Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 25610

Last change on this file since 25610 was 25610, checked in by sjm84, 12 years ago
First phase of making GLI format editor properly indented
Property svn:keywords set to `Author Date Id Revision`
File size: 29.4 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15
16	// SAX
17	import org.xml.sax.XMLReader;
18	import org.xml.sax.SAXException;
19	import org.xml.sax.SAXParseException;
20	import org.xml.sax.helpers.DefaultHandler;
21	import org.xml.sax.InputSource;
22
23	// JAXP
24	import javax.xml.parsers.FactoryConfigurationError;
25	import javax.xml.parsers.ParserConfigurationException;
26	import javax.xml.parsers.SAXParser;
27	import javax.xml.parsers.SAXParserFactory;
28
29	/** This class is a static class containing useful XML functions */
30	public class XMLTools
31	{
32	/** extracts the text out of a node */
33	public static Node getNodeTextNode(Element param)
34	{
35	param.normalize();
36	Node n = param.getFirstChild();
37	while (n != null && n.getNodeType() != Node.TEXT_NODE)
38	{
39	n = n.getNextSibling();
40	}
41	return n;
42	}
43
44	/** extracts the text out of a node */
45	public static String getNodeText(Element param)
46	{
47	Node text_node = getNodeTextNode(param);
48	if (text_node == null)
49	{
50	return "";
51	}
52	return text_node.getNodeValue();
53	}
54
55	public static void setNodeText(Element elem, String text)
56	{
57	Node old_text_node = getNodeTextNode(elem);
58	if (old_text_node != null)
59	{
60	elem.removeChild(old_text_node);
61	}
62	Text t = elem.getOwnerDocument().createTextNode(text);
63	elem.appendChild(t);
64	}
65
66	/** returns the (first) child element with the given name */
67	public static Node getChildByTagName(Node n, String name)
68	{
69
70	Node child = n.getFirstChild();
71	while (child != null)
72	{
73	if (child.getNodeName().equals(name))
74	{
75	return child;
76	}
77	child = child.getNextSibling();
78	}
79	return null; //not found
80	}
81
82	/**
83	* returns the (nth) child element with the given name index numbers start
84	* at 0
85	*/
86	public static Node getChildByTagNameIndexed(Node n, String name, int index)
87	{
88	if (index == -1)
89	{
90	return getChildByTagName(n, name);
91	}
92	int count = 0;
93	Node child = n.getFirstChild();
94	while (child != null)
95	{
96	if (child.getNodeName().equals(name))
97	{
98	if (count == index)
99	{
100	return child;
101	}
102	else
103	{
104	count++;
105	}
106	}
107	child = child.getNextSibling();
108	}
109	return null; //not found
110	}
111
112	/**
113	* returns the element parent/node_name[@attribute_name='attribute_value']
114	*/
115	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
116	{
117
118	NodeList children = parent.getChildNodes();
119	for (int i = 0; i < children.getLength(); i++)
120	{
121	Node child = children.item(i);
122	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
123	if (child.getNodeName().equals(node_name))
124	{
125	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
126	return (Element) child;
127	}
128	}
129	// not found
130	return null;
131	}
132
133	/**
134	* returns a list of elements
135	* parent/node_name[@attribute_name='attribute_value']
136	*/
137	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
138	{
139	ArrayList elements = new ArrayList();
140	NodeList children = parent.getChildNodes();
141	for (int i = 0; i < children.getLength(); i++)
142	{
143	//System.out.println("getNamedElementList");
144	Node child = children.item(i);
145	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
146	if (child.getNodeName().equals(node_name))
147	{
148	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
149	elements.add((Element) child);
150	}
151	}
152	// not found
153	if (elements.size() == 0)
154	{
155	elements = null;
156	}
157	return elements;
158	}
159
160	public static void copyAllChildren(Element to, Element from)
161	{
162
163	Document to_doc = to.getOwnerDocument();
164	Node child = from.getFirstChild();
165	while (child != null)
166	{
167	to.appendChild(to_doc.importNode(child, true));
168	child = child.getNextSibling();
169	}
170	}
171
172	/** Duplicates an element */
173	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
174	{
175	return duplicateElementNS(owner, element, null, with_attributes);
176	}
177
178	/** Duplicates an element */
179	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
180	{
181	Element duplicate;
182	if (namespace_uri == null)
183	{
184	duplicate = owner.createElement(element.getTagName());
185	}
186	else
187	{
188	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
189	}
190	// Copy element attributes
191	if (with_attributes)
192	{
193	NamedNodeMap attributes = element.getAttributes();
194	for (int i = 0; i < attributes.getLength(); i++)
195	{
196	Node attribute = attributes.item(i);
197	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
198	}
199	}
200
201	// Copy element children
202	NodeList children = element.getChildNodes();
203	for (int i = 0; i < children.getLength(); i++)
204	{
205	Node child = children.item(i);
206	duplicate.appendChild(owner.importNode(child, true));
207	}
208
209	return duplicate;
210	}
211
212	/** Remove all of the child nodes from a certain node. */
213	static final public void clear(Node node)
214	{
215	while (node.hasChildNodes())
216	{
217	node.removeChild(node.getFirstChild());
218	}
219	}
220
221	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
222	{
223	ArrayList child_elements = new ArrayList();
224
225	NodeList children_nodelist = parent_element.getChildNodes();
226	for (int i = 0; i < children_nodelist.getLength(); i++)
227	{
228	Node child_node = children_nodelist.item(i);
229	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
230	{
231	child_elements.add(child_node);
232	}
233	}
234
235	return child_elements;
236	}
237
238	static public String getElementTextValue(Element element)
239	{
240	// Find the first text node child
241	NodeList children_nodelist = element.getChildNodes();
242	for (int i = 0; i < children_nodelist.getLength(); i++)
243	{
244	Node child_node = children_nodelist.item(i);
245	if (child_node.getNodeType() == Node.TEXT_NODE)
246	{
247	return child_node.getNodeValue();
248	}
249	}
250
251	// None found
252	return "";
253	}
254
255	/**
256	* Method to retrieve the value of a given node.
257	*
258	* @param element
259	* The <strong>Element</strong> whose value we wish to find. Soon
260	* to be deprecated!
261	*/
262	static final public String getValue(Node element)
263	{
264	if (element == null)
265	{
266	return "";
267	}
268	// If we've been given a subject node first retrieve its value node.
269	if (element.getNodeName().equals("Subject"))
270	{
271	element = getNodeFromNamed(element, "Value");
272	}
273	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
274	if (element != null && element.hasChildNodes())
275	{
276	StringBuffer text_buffer = new StringBuffer();
277	NodeList text_nodes = element.getChildNodes();
278	for (int i = 0; i < text_nodes.getLength(); i++)
279	{
280	Node possible_text = text_nodes.item(i);
281	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
282	{
283	text_buffer.append(possible_text.getNodeValue());
284	}
285	}
286	return text_buffer.toString();
287	}
288	return "";
289	}
290
291	/**
292	* Method to retrieve from the node given, a certain child node with the
293	* specified name.
294	*
295	* @param parent
296	* The <strong>Node</strong> whose children should be searched.
297	* @param name
298	* The required nodes name as a <strong>String</strong>.
299	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
300	* otherwise. Soon to be deprecated!
301	*/
302	static final public Node getNodeFromNamed(Node parent, String name)
303	{
304	Node child = null;
305	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
306	{
307	if (i.getNodeName().equals(name))
308	{
309	child = i;
310	}
311	}
312	return child;
313	}
314
315	static final public String WELLFORMED = "well-formed !";
316	static final public String NOTWELLFORMED = "not well-formed";
317	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
318	static final private String FOOTER = "</collectionConfig>";
319
320	public static String parse(String xml_str)
321	{
322	String validation_msg = WELLFORMED;
323	xml_str = HEADER + xml_str + FOOTER;
324	try
325	{
326	SAXParserFactory factory = SAXParserFactory.newInstance();
327	factory.setNamespaceAware(true);
328	//factory.setValidating (true);
329	SAXParser parser = factory.newSAXParser();
330	InputSource iSource = new InputSource(new StringReader(xml_str));
331	// parser.parse (iSource, new DefaultHandler ());
332
333	org.xml.sax.XMLReader reader = parser.getXMLReader();
334	reader.setContentHandler(new DefaultHandler());
335	reader.setErrorHandler(new DefaultHandler());
336	reader.parse(iSource);
337	}
338	catch (FactoryConfigurationError e)
339	{
340	validation_msg = "unable to get a document builder factory";
341	}
342	catch (ParserConfigurationException e)
343	{
344	validation_msg = "unable to configure parser";
345	}
346	catch (SAXParseException e)
347	{
348	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
349	}
350	catch (SAXException e)
351	{
352	validation_msg += " Fatal error: " + e.toString();
353	}
354	catch (IOException e)
355	{
356	validation_msg = "Unable to read the input, i/o error";
357	}
358
359	return validation_msg;
360	}
361
362	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
363	public static String parseDOM(String xml_str)
364	{
365	String validation_msg = WELLFORMED;
366
367	try
368	{
369	SAXParserFactory factory = SAXParserFactory.newInstance();
370	factory.setNamespaceAware(true);
371	//factory.setValidating (true);
372	SAXParser parser = factory.newSAXParser();
373	InputSource iSource = new InputSource(new StringReader(xml_str));
374	// parser.parse (iSource, new DefaultHandler ());
375
376	org.xml.sax.XMLReader reader = parser.getXMLReader();
377	reader.setContentHandler(new DefaultHandler());
378	reader.setErrorHandler(new DefaultHandler());
379	reader.parse(iSource);
380	}
381	catch (FactoryConfigurationError e)
382	{
383	validation_msg = "unable to get a document builder factory";
384	}
385	catch (ParserConfigurationException e)
386	{
387	validation_msg = "unable to configure parser";
388	}
389	catch (SAXParseException e)
390	{
391	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
392	}
393	catch (SAXException e)
394	{
395	validation_msg += " " + e.toString();
396	}
397	catch (IOException e)
398	{
399	validation_msg = "Unable to read the input, i/o error";
400	}
401
402	return validation_msg;
403	}
404
405	public static String parse(File xml_file)
406	{
407	String validation_msg = WELLFORMED;
408
409	try
410	{
411	SAXParserFactory factory = SAXParserFactory.newInstance();
412	factory.setNamespaceAware(true);
413	//factory.setValidating (true);
414	SAXParser parser = factory.newSAXParser();
415	FileReader r = new FileReader(xml_file);
416	InputSource iSource = new InputSource(r);
417	XMLReader reader = parser.getXMLReader();
418	reader.setContentHandler(new DefaultHandler());
419	reader.setErrorHandler(new DefaultHandler());
420	reader.parse(iSource);
421	}
422	catch (FactoryConfigurationError e)
423	{
424	validation_msg = "unable to get a document builder factory";
425	}
426	catch (ParserConfigurationException e)
427	{
428	validation_msg = "unable to configure parser";
429	}
430	catch (SAXParseException e)
431	{
432	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433	}
434	catch (SAXException e)
435	{
436	validation_msg += " Fatal error: " + e.toString();
437	}
438	catch (IOException e)
439	{
440	validation_msg = "Unable to read the input, i/o error";
441	}
442
443	return validation_msg;
444	}
445
446	/** Returns a string of the location. */
447	private static String getLocationString(SAXParseException ex)
448	{
449	StringBuffer str = new StringBuffer();
450
451	String systemId = ex.getSystemId();
452	if (systemId != null)
453	{
454	int index = systemId.lastIndexOf('/');
455	if (index != -1)
456	systemId = systemId.substring(index + 1);
457	str.append(systemId);
458	}
459	str.append("(line ");
460	str.append(ex.getLineNumber() - 1);
461	str.append(", column ");
462	str.append(ex.getColumnNumber());
463	str.append("): ");
464
465	return str.toString();
466
467	} // getLocationString(SAXParseException):String
468
469	/** Parse an XML document from a given file path */
470	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
471	{
472	if (use_class_loader == true)
473	{
474	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
475	if (is != null)
476	{
477	return parseXML(is);
478	}
479	}
480
481	// Try the file outside the classes directory
482	return parseXMLFile(new File(xml_file_path));
483	}
484
485	/** Parse an XML document from a given file */
486	static public Document parseXMLFile(File xml_file)
487	{
488	// No file? No point trying!
489	if (xml_file.exists() == false)
490	{
491	return null;
492	}
493
494	try
495	{
496	return parseXML(new FileInputStream(xml_file));
497	}
498	catch (Exception exception)
499	{
500	DebugStream.printStackTrace(exception);
501	return null;
502	}
503	}
504
505	/** Parse an XML document from a given input stream */
506	static public Document parseXML(InputStream xml_input_stream)
507	{
508	Document document = null;
509
510	try
511	{
512	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
513	document = parseXML(isr);
514	isr.close();
515	xml_input_stream.close();
516	}
517	catch (Exception exception)
518	{
519	DebugStream.printStackTrace(exception);
520	}
521
522	return document;
523	}
524
525	/** Parse an XML document from a given reader */
526	static public Document parseXML(Reader xml_reader)
527	{
528	Document document = null;
529
530	// If debugging, the following will store the XML contents to be parsed,
531	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
532	String xmlContents = "";
533
534	try
535	{
536	Reader reader = null;
537
538	// (1) By default, GLI will remove any contents preceeding (and invalidating)
539	// the XML and present these lines separately to the user
540	if (!DebugStream.isDebuggingEnabled())
541	{
542	try
543	{
544	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
545	}
546	catch (Exception e)
547	{
548	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
549	e.printStackTrace();
550	}
551	}
552
553	// (2) If we are running GLI in debug mode:
554	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
555	// idea of where things went wrong. This will print the "XML" contents to either
556	// system.out (if debugging is off) or to the DebugStream otherwise.
557	// We need to read the XML twice to know the line where things went wrong, so
558	// do the additional reading only if we're debugging
559	else
560	{
561	StringBuffer buf = new StringBuffer();
562	char[] buffer = new char[500];
563	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
564	while (numCharsRead != -1)
565	{
566	buf.append(buffer, 0, numCharsRead);
567	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
568	}
569	xmlContents = buf.toString();
570	xml_reader.close(); // closing the old Reader
571	xml_reader = null;
572	buffer = null;
573	buf = null;
574	// we need a Reader to parse the same contents as the Reader that was just closed
575	reader = new BufferedReader(new StringReader(xmlContents));
576	//System.err.println("xmlContents:\n" + xmlContents);
577	}
578
579	// (2) The actual XML parsing
580	InputSource isc = new InputSource(reader);
581	DOMParser parser = new DOMParser();
582	parser.setFeature("http://xml.org/sax/features/validation", false);
583	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
584	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
585	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
586	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
587	parser.parse(isc);
588	document = parser.getDocument();
589
590	}
591	catch (SAXParseException e)
592	{
593	showXMLParseFailureLine(e, xmlContents);
594	}
595	catch (SAXException exception)
596	{
597	System.err.println("SAX exception: " + exception.getMessage());
598	if (DebugStream.isDebuggingEnabled())
599	{
600	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
601	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
602	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
603	System.exit(-1);
604	}
605	// else, not running in debug mode, so don't exit after exception
606	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
607	DebugStream.printStackTrace(exception);
608	}
609	catch (Exception exception)
610	{
611	DebugStream.printStackTrace(exception);
612	}
613
614	return document;
615	}
616
617	/**
618	* Displays the line (string) where the SAXParseException occurred, given a
619	* String of the entire xml that was being parsed and the SAXParseException
620	* object that was caught. The messages are printed to DebugStream, so run
621	* GLI/FLI with -debug to view this output.
622	*
623	* @param xmlContents
624	* is the entire xml that was being parsed when the exception
625	* occurred
626	* @param e
627	* is the SAXParseException object that was thrown upon parsing
628	* the xmlContents.
629	*/
630	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
631	{
632
633	// There should be no characters at all that preceed the <?xml>... bit.
634	// The first check is for starting spaces:
635	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
636	{
637	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
638	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
639	return; // nothing more to do, first error identified
640	}
641
642	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
643	String line = "";
644	int linenumber = e.getLineNumber();
645	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
646	if (DebugStream.isDebuggingEnabled())
647	{
648	if (linenumber != -1)
649	{
650	String[] lines = xmlContents.split("\n");
651	if (lines.length > 0)
652	{
653	DebugStream.println(" (number of lines: " + lines.length + ")");
654	if (lines.length >= linenumber)
655	{
656	line = lines[linenumber - 1];
657	}
658	else
659	{ // error is past the last line
660	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
661	}
662	}
663	else
664	{
665	DebugStream.print("\n");
666	}
667	lines = null;
668
669	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
670	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
671
672	// Uncomment if you want to print out the entire contents of the XML doc:
673	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
674	// + xmlContents + "\n************END\n");
675	}
676	else
677	{ // no particular line number, print out all the xml so debugger can inspect it
678	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
679	}
680	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
681	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
682	System.exit(-1);
683	}
684	else
685	{ // not running in debug mode
686	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
687	}
688	}
689
690	static public StringBuffer readXMLStream(InputStream input_stream)
691	{
692	StringBuffer xml = new StringBuffer("");
693
694	try
695	{
696	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
697	BufferedReader buffered_in = new BufferedReader(isr);
698
699	String line = "";
700	boolean xml_content = false;
701	while ((line = buffered_in.readLine()) != null)
702	{
703	if (xml_content)
704	{
705	xml.append(line);
706	xml.append("\n");
707	}
708	else if (line.trim().startsWith("<?xml"))
709	{
710	xml_content = true;
711	xml.append(line);
712	xml.append("\n");
713	}
714	else
715	{
716	System.err.println(line);
717	}
718	}
719	buffered_in = null;
720	}
721	catch (Exception error)
722	{
723	System.err.println("Failed when trying to parse XML stream");
724	error.printStackTrace();
725	}
726
727	return xml;
728	}
729
730	/**
731	* Removes characters that are invalid in XML (see
732	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
733	*/
734	static public String removeInvalidCharacters(String text)
735	{
736	char[] safe_characters = new char[text.length()];
737	int j = 0;
738
739	char[] raw_characters = new char[text.length()];
740	text.getChars(0, text.length(), raw_characters, 0);
741	for (int i = 0; i < raw_characters.length; i++)
742	{
743	char character = raw_characters[i];
744	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
745	{
746	safe_characters[j] = character;
747	j++;
748	}
749	}
750
751	return new String(safe_characters, 0, j);
752	}
753
754	static public void setElementTextValue(Element element, String text)
755	{
756	// Remove all text node children
757	NodeList children_nodelist = element.getChildNodes();
758	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
759	{
760	Node child_node = children_nodelist.item(i);
761	if (child_node.getNodeType() == Node.TEXT_NODE)
762	{
763	element.removeChild(child_node);
764	}
765	}
766
767	// Add a new text node
768	if (text != null)
769	{
770	element.appendChild(element.getOwnerDocument().createTextNode(text));
771	}
772	}
773
774	/**
775	* Set the #text node value of some element.
776	*
777	* @param element
778	* the Element whose value we wish to set
779	* @param value
780	* the new value for the element as a String Soon to be
781	* deprecated!
782	*/
783	static final public void setValue(Element element, String value)
784	{
785	// Remove any existing child node(s)
786	clear(element);
787	// Add new text node.
788	if (value != null)
789	{
790	element.appendChild(element.getOwnerDocument().createTextNode(value));
791	}
792	}
793
794	static public void indentXML(Element elem, int depth)
795	{
796	Document doc = elem.getOwnerDocument();
797
798	String startIndentString = "\n";
799	for (int i = 0; i < depth; i++)
800	{
801	startIndentString += "\t";
802	}
803	Node startTextNode = doc.createTextNode(startIndentString);
804
805	String endIndentString = "\n";
806	for (int i = 0; i < depth - 1; i++)
807	{
808	endIndentString += "\t";
809	}
810	Node endTextNode = doc.createTextNode(endIndentString);
811
812	boolean found = false;
813	Node child = elem.getFirstChild();
814	while (child != null)
815	{
816	if (child.getNodeType() == Node.ELEMENT_NODE)
817	{
818	found = true;
819	break;
820	}
821	child = child.getNextSibling();
822	}
823
824	if (found)
825	{
826	elem.appendChild(endTextNode);
827	}
828
829	child = elem.getFirstChild();
830	while (child != null)
831	{
832	if (child.getNodeType() == Node.ELEMENT_NODE)
833	{
834	elem.insertBefore(startTextNode.cloneNode(false), child);
835	indentXML((Element) child, depth + 1);
836	}
837	child = child.getNextSibling();
838	}
839	}
840
841	/**
842	* Write an XML document to a given file with the text node of the specified
843	* element unescaped
844	*/
845	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
846	{
847	indentXML(document.getDocumentElement(), 1);
848	try
849	{
850	OutputStream os = new FileOutputStream(xml_file);
851	// Create an output format for our document.
852	OutputFormat f = new OutputFormat(document);
853	f.setEncoding("UTF-8");
854	f.setIndenting(true);
855	f.setLineWidth(0); // Why isn't this working!
856	f.setPreserveSpace(true);
857	if (nonEscapingTagNames != null)
858	{
859	f.setNonEscapingElements(nonEscapingTagNames);
860	}
861	// Create the necessary writer stream for serialization.
862	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
863	Writer w = new BufferedWriter(osw);
864	// Generate a new serializer from the above.
865	XMLSerializer s = new XMLSerializer(w, f);
866	s.asDOMSerializer();
867	// Finally serialize the document to file.
868	s.serialize(document);
869	// And close.
870	os.close();
871	}
872	catch (Exception exception)
873	{
874	DebugStream.printStackTrace(exception);
875	}
876	}
877
878	/** Write an XML document to a given file */
879	static public void writeXMLFile(File xml_file, Document document)
880	{
881	writeXMLFile(xml_file, document, null);
882	}
883
884	public static void printXMLNode(Node e)
885	{
886	printXMLNode(e, 0);
887	}
888
889	public static void printXMLNode(Node e, int depth)
890	{ //recursive method call using DOM API...
891
892	for (int i = 0; i < depth; i++)
893	System.out.print(' ');
894
895	if (e.getNodeType() == Node.TEXT_NODE)
896	{
897	//System.out.println("text") ;
898	if (e.getNodeValue() != "")
899	{
900	System.out.println(e.getNodeValue());
901	}
902	return;
903	}
904
905	System.out.print('<');
906	System.out.print(e.getNodeName());
907	NamedNodeMap attrs = e.getAttributes();
908	if (attrs != null)
909	{
910	for (int i = 0; i < attrs.getLength(); i++)
911	{
912	Node attr = attrs.item(i);
913	System.out.print(' ');
914	System.out.print(attr.getNodeName());
915	System.out.print("=\"");
916	System.out.print(attr.getNodeValue());
917	System.out.print('"');
918	}
919	}
920	NodeList children = e.getChildNodes();
921
922	if (children == null \|\| children.getLength() == 0)
923	System.out.println("/>");
924	else
925	{
926
927	System.out.println('>');
928
929	int len = children.getLength();
930	for (int i = 0; i < len; i++)
931	{
932	printXMLNode(children.item(i), depth + 1);
933	}
934
935	for (int i = 0; i < depth; i++)
936	System.out.print(' ');
937
938	System.out.println("</" + e.getNodeName() + ">");
939	}
940
941	}
942
943	public static String xmlNodeToString(Node e)
944	{
945	StringBuffer sb = new StringBuffer("");
946	xmlNodeToString(sb, e, true, "\t", 2);
947	return sb.toString();
948	}
949
950	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
951	{
952	if (e.getNodeType() == Node.TEXT_NODE)
953	{
954	if (e.getNodeValue() != "")
955	{
956	String text = e.getNodeValue();
957	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("^[\\n\\r\\t\\s]*", "");
958	sb.append(text);
959	}
960	return;
961	}
962
963	if (e.getNodeType() == Node.COMMENT_NODE)
964	{
965	if (e.getNodeValue() != "")
966	{
967	sb.append("<!--" + e.getNodeValue() + "-->");
968	}
969	return;
970	}
971
972	if (indent)
973	{
974	for (int i = 0; i < depth; i++)
975	{
976	sb.append(indentString);
977	}
978	}
979
980	sb.append('<');
981	sb.append(e.getNodeName());
982	NamedNodeMap attrs = e.getAttributes();
983	if (attrs != null)
984	{
985	for (int i = 0; i < attrs.getLength(); i++)
986	{
987	Node attr = attrs.item(i);
988	sb.append(' ');
989	sb.append(attr.getNodeName());
990	sb.append("=\"");
991	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
992	sb.append('"');
993	}
994	}
995	NodeList children = e.getChildNodes();
996
997	boolean hasElements = false;
998	boolean indentSwapped = false;
999	for (int i = 0; i < children.getLength(); i++)
1000	{
1001	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1002	{
1003	hasElements = true;
1004	}
1005	if (children.item(i).getNodeType() == Node.TEXT_NODE && indent)
1006	{
1007	if (children.item(i).getNodeValue().matches("[^\\s]*"))
1008	{
1009	indentSwapped = true;
1010	indent = false;
1011	}
1012	}
1013	}
1014
1015	if (children == null \|\| children.getLength() == 0)
1016	{
1017	sb.append("/>");
1018
1019	if (indent)
1020	{
1021	sb.append("\n");
1022	}
1023	}
1024	else
1025	{
1026	sb.append(">");
1027	if (hasElements && indent)
1028	{
1029	sb.append("\n");
1030	}
1031
1032	int len = children.getLength();
1033	for (int i = 0; i < len; i++)
1034	{
1035	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1036	}
1037
1038	if (indent)
1039	{
1040	for (int i = 0; i < depth; i++)
1041	{
1042	sb.append(indentString);
1043	}
1044	}
1045
1046	sb.append("</" + e.getNodeName() + ">");
1047
1048	if ((hasElements && indent) \|\| indentSwapped)
1049	{
1050	sb.append("\n");
1051	}
1052	}
1053	}
1054
1055	public static String xmlNodeToStringWithoutIndenting(Node e)
1056	{
1057	StringBuffer sb = new StringBuffer("");
1058	xmlNodeToStringWithoutNewline(sb, e, -1);
1059	return sb.toString();
1060	}
1061
1062	public static String xmlNodeToStringWithoutNewline(Node e)
1063	{
1064	StringBuffer sb = new StringBuffer("");
1065	xmlNodeToStringWithoutNewline(sb, e, 0);
1066	return sb.toString();
1067	}
1068
1069	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1070	{
1071
1072	for (int i = 0; i < depth; i++)
1073	{
1074	sb.append(' ');
1075	}
1076
1077	if (e.getNodeType() == Node.TEXT_NODE)
1078	{
1079	if (e.getNodeValue() != "")
1080	{
1081	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1082	}
1083	return;
1084	}
1085
1086	if (e.getNodeType() == Node.COMMENT_NODE)
1087	{
1088	if (e.getNodeValue() != "")
1089	{
1090	sb.append("<!--" + e.getNodeValue() + "-->");
1091	}
1092	return;
1093	}
1094
1095	sb.append('<');
1096	sb.append(e.getNodeName());
1097	NamedNodeMap attrs = e.getAttributes();
1098	if (attrs != null)
1099	{
1100	for (int i = 0; i < attrs.getLength(); i++)
1101	{
1102	Node attr = attrs.item(i);
1103	sb.append(' ');
1104	sb.append(attr.getNodeName());
1105	sb.append("=\"");
1106	sb.append(attr.getNodeValue());
1107	sb.append('"');
1108	}
1109	}
1110	NodeList children = e.getChildNodes();
1111
1112	if (children == null \|\| children.getLength() == 0)
1113	sb.append("/>");
1114	else
1115	{
1116
1117	sb.append(">");
1118
1119	int len = children.getLength();
1120	for (int i = 0; i < len; i++)
1121	{
1122	if (depth >= 0)
1123	{
1124	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1125	}
1126	else
1127	{
1128	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1129	}
1130	}
1131
1132	for (int i = 0; i < depth; i++)
1133	sb.append(' ');
1134
1135	sb.append("</" + e.getNodeName() + ">");
1136	}
1137	}
1138	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: