Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 29001

Last change on this file since 29001 was 29001, checked in by ak19, 10 years ago
Correction to recent commit in OpenCollectionDialog. And cosmetic changes to FormatConversionDialog and XMLTools
Property svn:keywords set to `Author Date Id Revision`
File size: 32.0 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15
16	// SAX
17	import org.xml.sax.XMLReader;
18	import org.xml.sax.SAXException;
19	import org.xml.sax.SAXParseException;
20	import org.xml.sax.helpers.DefaultHandler;
21	import org.xml.sax.InputSource;
22
23	// JAXP
24	import javax.xml.parsers.DocumentBuilder;
25	import javax.xml.parsers.DocumentBuilderFactory;
26	import javax.xml.parsers.FactoryConfigurationError;
27	import javax.xml.parsers.ParserConfigurationException;
28	import javax.xml.parsers.SAXParser;
29	import javax.xml.parsers.SAXParserFactory;
30
31
32	/** This class is a static class containing useful XML functions */
33	public class XMLTools
34	{
35	/** extracts the text out of a node */
36	public static Node getNodeTextNode(Element param)
37	{
38	param.normalize();
39	Node n = param.getFirstChild();
40	while (n != null && n.getNodeType() != Node.TEXT_NODE)
41	{
42	n = n.getNextSibling();
43	}
44	return n;
45	}
46
47	/** extracts the text out of a node */
48	public static String getNodeText(Element param)
49	{
50	Node text_node = getNodeTextNode(param);
51	if (text_node == null)
52	{
53	return "";
54	}
55	return text_node.getNodeValue();
56	}
57
58	public static void setNodeText(Element elem, String text)
59	{
60	Node old_text_node = getNodeTextNode(elem);
61	if (old_text_node != null)
62	{
63	elem.removeChild(old_text_node);
64	}
65	Text t = elem.getOwnerDocument().createTextNode(text);
66	elem.appendChild(t);
67	}
68
69	/** returns the (first) child element with the given name */
70	public static Node getChildByTagName(Node n, String name)
71	{
72
73	Node child = n.getFirstChild();
74	while (child != null)
75	{
76	if (child.getNodeName().equals(name))
77	{
78	return child;
79	}
80	child = child.getNextSibling();
81	}
82	return null; //not found
83	}
84
85	/**
86	* returns the (nth) child element with the given name index numbers start
87	* at 0
88	*/
89	public static Node getChildByTagNameIndexed(Node n, String name, int index)
90	{
91	if (index == -1)
92	{
93	return getChildByTagName(n, name);
94	}
95	int count = 0;
96	Node child = n.getFirstChild();
97	while (child != null)
98	{
99	if (child.getNodeName().equals(name))
100	{
101	if (count == index)
102	{
103	return child;
104	}
105	else
106	{
107	count++;
108	}
109	}
110	child = child.getNextSibling();
111	}
112	return null; //not found
113	}
114
115	/**
116	* returns the element parent/node_name[@attribute_name='attribute_value']
117	*/
118	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
119	{
120
121	NodeList children = parent.getChildNodes();
122	for (int i = 0; i < children.getLength(); i++)
123	{
124	Node child = children.item(i);
125	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
126	if (child.getNodeName().equals(node_name))
127	{
128	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
129	return (Element) child;
130	}
131	}
132	// not found
133	return null;
134	}
135
136	/**
137	* returns a list of elements
138	* parent/node_name[@attribute_name='attribute_value']
139	*/
140	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
141	{
142	ArrayList elements = new ArrayList();
143	NodeList children = parent.getChildNodes();
144	for (int i = 0; i < children.getLength(); i++)
145	{
146	//System.out.println("getNamedElementList");
147	Node child = children.item(i);
148	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
149	if (child.getNodeName().equals(node_name))
150	{
151	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
152	elements.add((Element) child);
153	}
154	}
155	// not found
156	if (elements.size() == 0)
157	{
158	elements = null;
159	}
160	return elements;
161	}
162
163	public static void copyAllChildren(Element to, Element from)
164	{
165
166	Document to_doc = to.getOwnerDocument();
167	Node child = from.getFirstChild();
168	while (child != null)
169	{
170	to.appendChild(to_doc.importNode(child, true));
171	child = child.getNextSibling();
172	}
173	}
174
175	/** duplicates all elements in list elements and appends to toElement */
176	public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
177	int num_elems = elements.getLength();
178	if (num_elems < 1)
179	{
180	return;
181	}
182	for (int i = 0; i < num_elems; i++)
183	{
184	Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
185	toElement.appendChild(to_element);
186	}
187
188	}
189	/** Duplicates an element */
190	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
191	{
192	return duplicateElementNS(owner, element, null, with_attributes);
193	}
194
195	/** Duplicates an element */
196	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
197	{
198	Element duplicate;
199	if (namespace_uri == null)
200	{
201	duplicate = owner.createElement(element.getTagName());
202	}
203	else
204	{
205	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
206	}
207	// Copy element attributes
208	if (with_attributes)
209	{
210	NamedNodeMap attributes = element.getAttributes();
211	for (int i = 0; i < attributes.getLength(); i++)
212	{
213	Node attribute = attributes.item(i);
214	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
215	}
216	}
217
218	// Copy element children
219	NodeList children = element.getChildNodes();
220	for (int i = 0; i < children.getLength(); i++)
221	{
222	Node child = children.item(i);
223	duplicate.appendChild(owner.importNode(child, true));
224	}
225
226	return duplicate;
227	}
228
229	/** Remove all of the child nodes from a certain node. */
230	static final public void clear(Node node)
231	{
232	while (node.hasChildNodes())
233	{
234	node.removeChild(node.getFirstChild());
235	}
236	}
237
238	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
239	{
240	ArrayList child_elements = new ArrayList();
241
242	NodeList children_nodelist = parent_element.getChildNodes();
243	for (int i = 0; i < children_nodelist.getLength(); i++)
244	{
245	Node child_node = children_nodelist.item(i);
246	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
247	{
248	child_elements.add(child_node);
249	}
250	}
251
252	return child_elements;
253	}
254
255	static public String getElementTextValue(Element element)
256	{
257	// Find the first text node child
258	NodeList children_nodelist = element.getChildNodes();
259	for (int i = 0; i < children_nodelist.getLength(); i++)
260	{
261	Node child_node = children_nodelist.item(i);
262	if (child_node.getNodeType() == Node.TEXT_NODE)
263	{
264	return child_node.getNodeValue();
265	}
266	}
267
268	// None found
269	return "";
270	}
271
272	/**
273	* Method to retrieve the value of a given node.
274	*
275	* @param element
276	* The <strong>Element</strong> whose value we wish to find. Soon
277	* to be deprecated!
278	*/
279	static final public String getValue(Node element)
280	{
281	if (element == null)
282	{
283	return "";
284	}
285	// If we've been given a subject node first retrieve its value node.
286	if (element.getNodeName().equals("Subject"))
287	{
288	element = getNodeFromNamed(element, "Value");
289	}
290	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
291	if (element != null && element.hasChildNodes())
292	{
293	StringBuffer text_buffer = new StringBuffer();
294	NodeList text_nodes = element.getChildNodes();
295	for (int i = 0; i < text_nodes.getLength(); i++)
296	{
297	Node possible_text = text_nodes.item(i);
298	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
299	{
300	text_buffer.append(possible_text.getNodeValue());
301	}
302	}
303	return text_buffer.toString();
304	}
305	return "";
306	}
307
308	/**
309	* Method to retrieve from the node given, a certain child node with the
310	* specified name.
311	*
312	* @param parent
313	* The <strong>Node</strong> whose children should be searched.
314	* @param name
315	* The required nodes name as a <strong>String</strong>.
316	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
317	* otherwise. Soon to be deprecated!
318	*/
319	static final public Node getNodeFromNamed(Node parent, String name)
320	{
321	Node child = null;
322	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
323	{
324	if (i.getNodeName().equals(name))
325	{
326	child = i;
327	}
328	}
329	return child;
330	}
331
332	static final public String WELLFORMED = "well-formed !";
333	static final public String NOTWELLFORMED = "not well-formed";
334	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
335	static final private String FOOTER = "</collectionConfig>";
336
337
338	public static Document getDOM(String xml_str)
339	{
340	Document doc = null;
341	try {
342
343	DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
344	InputSource is = new InputSource();
345	is.setCharacterStream(new StringReader(xml_str));
346	doc = db.parse(is);
347
348	} catch (Exception e) {
349	e.printStackTrace();
350	}
351	return doc;
352	}
353
354	public static String parse(String xml_str)
355	{
356	String validation_msg = WELLFORMED;
357	xml_str = HEADER + xml_str + FOOTER;
358	try
359	{
360	SAXParserFactory factory = SAXParserFactory.newInstance();
361	factory.setNamespaceAware(true);
362	//factory.setValidating (true);
363	SAXParser parser = factory.newSAXParser();
364	InputSource iSource = new InputSource(new StringReader(xml_str));
365	// parser.parse (iSource, new DefaultHandler ());
366
367	org.xml.sax.XMLReader reader = parser.getXMLReader();
368	reader.setContentHandler(new DefaultHandler());
369	reader.setErrorHandler(new DefaultHandler());
370	reader.parse(iSource);
371	}
372	catch (FactoryConfigurationError e)
373	{
374	validation_msg = "unable to get a document builder factory";
375	}
376	catch (ParserConfigurationException e)
377	{
378	validation_msg = "unable to configure parser";
379	}
380	catch (SAXParseException e)
381	{
382	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
383	}
384	catch (SAXException e)
385	{
386	validation_msg += " Fatal error: " + e.toString();
387	}
388	catch (IOException e)
389	{
390	validation_msg = "Unable to read the input, i/o error";
391	}
392
393	return validation_msg;
394	}
395
396	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
397	public static String parseDOM(String xml_str)
398	{
399	String validation_msg = WELLFORMED;
400
401	try
402	{
403	SAXParserFactory factory = SAXParserFactory.newInstance();
404	factory.setNamespaceAware(true);
405	//factory.setValidating (true);
406	SAXParser parser = factory.newSAXParser();
407	InputSource iSource = new InputSource(new StringReader(xml_str));
408	// parser.parse (iSource, new DefaultHandler ());
409
410	org.xml.sax.XMLReader reader = parser.getXMLReader();
411	reader.setContentHandler(new DefaultHandler());
412	reader.setErrorHandler(new DefaultHandler());
413	reader.parse(iSource);
414	}
415	catch (FactoryConfigurationError e)
416	{
417	validation_msg = "unable to get a document builder factory";
418	}
419	catch (ParserConfigurationException e)
420	{
421	validation_msg = "unable to configure parser";
422	}
423	catch (SAXParseException e)
424	{
425	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
426	}
427	catch (SAXException e)
428	{
429	validation_msg += " " + e.toString();
430	}
431	catch (IOException e)
432	{
433	validation_msg = "Unable to read the input, i/o error";
434	}
435
436	return validation_msg;
437	}
438
439	public static String parse(File xml_file)
440	{
441	String validation_msg = WELLFORMED;
442
443	try
444	{
445	SAXParserFactory factory = SAXParserFactory.newInstance();
446	factory.setNamespaceAware(true);
447	//factory.setValidating (true);
448	SAXParser parser = factory.newSAXParser();
449	FileReader r = new FileReader(xml_file);
450	InputSource iSource = new InputSource(r);
451	XMLReader reader = parser.getXMLReader();
452	reader.setContentHandler(new DefaultHandler());
453	reader.setErrorHandler(new DefaultHandler());
454	reader.parse(iSource);
455	}
456	catch (FactoryConfigurationError e)
457	{
458	validation_msg = "unable to get a document builder factory";
459	}
460	catch (ParserConfigurationException e)
461	{
462	validation_msg = "unable to configure parser";
463	}
464	catch (SAXParseException e)
465	{
466	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
467	}
468	catch (SAXException e)
469	{
470	validation_msg += " Fatal error: " + e.toString();
471	}
472	catch (IOException e)
473	{
474	validation_msg = "Unable to read the input, i/o error";
475	}
476
477	return validation_msg;
478	}
479
480	/** Returns a string of the location. */
481	private static String getLocationString(SAXParseException ex)
482	{
483	StringBuffer str = new StringBuffer();
484
485	String systemId = ex.getSystemId();
486	if (systemId != null)
487	{
488	int index = systemId.lastIndexOf('/');
489	if (index != -1)
490	systemId = systemId.substring(index + 1);
491	str.append(systemId);
492	}
493	str.append("(line ");
494	str.append(ex.getLineNumber() - 1);
495	str.append(", column ");
496	str.append(ex.getColumnNumber());
497	str.append("): ");
498
499	return str.toString();
500
501	} // getLocationString(SAXParseException):String
502
503	/** Parse an XML document from a given file path */
504	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
505	{
506	if (use_class_loader == true)
507	{
508	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
509	if (is != null)
510	{
511	return parseXML(is);
512	}
513	}
514
515	// Try the file outside the classes directory
516	return parseXMLFile(new File(xml_file_path));
517	}
518
519	/** Parse an XML document from a given file */
520	static public Document parseXMLFile(File xml_file)
521	{
522	// No file? No point trying!
523	if (xml_file.exists() == false)
524	{
525	return null;
526	}
527
528	try
529	{
530	return parseXML(new FileInputStream(xml_file));
531	}
532	catch (Exception exception)
533	{
534	DebugStream.printStackTrace(exception);
535	return null;
536	}
537	}
538
539	/** Parse an XML document from a given input stream */
540	static public Document parseXML(InputStream xml_input_stream)
541	{
542	Document document = null;
543
544	try
545	{
546	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
547	document = parseXML(isr);
548	isr.close();
549	xml_input_stream.close();
550	}
551	catch (Exception exception)
552	{
553	DebugStream.printStackTrace(exception);
554	}
555
556	return document;
557	}
558
559	/** Parse an XML document from a given reader */
560	static public Document parseXML(Reader xml_reader)
561	{
562	Document document = null;
563
564	// If debugging, the following will store the XML contents to be parsed,
565	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
566	String xmlContents = "";
567
568	try
569	{
570	Reader reader = null;
571
572	// (1) By default, GLI will remove any contents preceeding (and invalidating)
573	// the XML and present these lines separately to the user
574	if (!DebugStream.isDebuggingEnabled())
575	{
576	try
577	{
578	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
579	}
580	catch (Exception e)
581	{
582	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
583	e.printStackTrace();
584	}
585	}
586
587	// (2) If we are running GLI in debug mode:
588	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
589	// idea of where things went wrong. This will print the "XML" contents to either
590	// system.out (if debugging is off) or to the DebugStream otherwise.
591	// We need to read the XML twice to know the line where things went wrong, so
592	// do the additional reading only if we're debugging
593	else
594	{
595	StringBuffer buf = new StringBuffer();
596	char[] buffer = new char[500];
597	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
598	while (numCharsRead != -1)
599	{
600	buf.append(buffer, 0, numCharsRead);
601	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
602	}
603	xmlContents = buf.toString();
604	xml_reader.close(); // closing the old Reader
605	xml_reader = null;
606	buffer = null;
607	buf = null;
608	// we need a Reader to parse the same contents as the Reader that was just closed
609	reader = new BufferedReader(new StringReader(xmlContents));
610	//System.err.println("xmlContents:\n" + xmlContents);
611	}
612
613	// (2) The actual XML parsing
614	InputSource isc = new InputSource(reader);
615	DOMParser parser = new DOMParser();
616	parser.setFeature("http://xml.org/sax/features/validation", false);
617	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
618	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
619	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
620	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
621	parser.parse(isc);
622	document = parser.getDocument();
623
624	}
625	catch (SAXParseException e)
626	{
627	showXMLParseFailureLine(e, xmlContents);
628	}
629	catch (SAXException exception)
630	{
631	System.err.println("SAX exception: " + exception.getMessage());
632	if (DebugStream.isDebuggingEnabled())
633	{
634	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
635	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
636	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
637	System.exit(-1);
638	}
639	// else, not running in debug mode, so don't exit after exception
640	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
641	DebugStream.printStackTrace(exception);
642	}
643	catch (Exception exception)
644	{
645	DebugStream.printStackTrace(exception);
646	}
647
648	return document;
649	}
650
651	/**
652	* Displays the line (string) where the SAXParseException occurred, given a
653	* String of the entire xml that was being parsed and the SAXParseException
654	* object that was caught. The messages are printed to DebugStream, so run
655	* GLI/FLI with -debug to view this output.
656	*
657	* @param xmlContents
658	* is the entire xml that was being parsed when the exception
659	* occurred
660	* @param e
661	* is the SAXParseException object that was thrown upon parsing
662	* the xmlContents.
663	*/
664	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
665	{
666
667	// There should be no characters at all that preceed the <?xml>... bit.
668	// The first check is for starting spaces:
669	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
670	{
671	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
672	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
673	return; // nothing more to do, first error identified
674	}
675
676	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
677	String line = "";
678	int linenumber = e.getLineNumber();
679	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
680	if (DebugStream.isDebuggingEnabled())
681	{
682	if (linenumber != -1)
683	{
684	String[] lines = xmlContents.split("\n");
685	if (lines.length > 0)
686	{
687	DebugStream.println(" (number of lines: " + lines.length + ")");
688	if (lines.length >= linenumber)
689	{
690	line = lines[linenumber - 1];
691	}
692	else
693	{ // error is past the last line
694	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
695	}
696	}
697	else
698	{
699	DebugStream.print("\n");
700	}
701	lines = null;
702
703	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
704	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
705
706	// Uncomment if you want to print out the entire contents of the XML doc:
707	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
708	// + xmlContents + "\n************END\n");
709	}
710	else
711	{ // no particular line number, print out all the xml so debugger can inspect it
712	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
713	}
714	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
715	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
716	System.exit(-1);
717	}
718	else
719	{ // not running in debug mode
720	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
721	}
722	}
723
724	static public StringBuffer readXMLStream(InputStream input_stream)
725	{
726	StringBuffer xml = new StringBuffer("");
727
728	try
729	{
730	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
731	BufferedReader buffered_in = new BufferedReader(isr);
732
733	String line = "";
734	boolean xml_content = false;
735	while ((line = buffered_in.readLine()) != null)
736	{
737	if (xml_content)
738	{
739	xml.append(line);
740	xml.append("\n");
741	}
742	else if (line.trim().startsWith("<?xml"))
743	{
744	xml_content = true;
745	xml.append(line);
746	xml.append("\n");
747	}
748	else
749	{
750	System.err.println(line);
751	}
752	}
753	buffered_in = null;
754	}
755	catch (Exception error)
756	{
757	System.err.println("Failed when trying to parse XML stream");
758	error.printStackTrace();
759	}
760
761	return xml;
762	}
763
764	/**
765	* Removes characters that are invalid in XML (see
766	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
767	*/
768	static public String removeInvalidCharacters(String text)
769	{
770	char[] safe_characters = new char[text.length()];
771	int j = 0;
772
773	char[] raw_characters = new char[text.length()];
774	text.getChars(0, text.length(), raw_characters, 0);
775	for (int i = 0; i < raw_characters.length; i++)
776	{
777	char character = raw_characters[i];
778	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
779	{
780	safe_characters[j] = character;
781	j++;
782	}
783	}
784
785	return new String(safe_characters, 0, j);
786	}
787
788	static public void setElementTextValue(Element element, String text)
789	{
790	// Remove all text node children
791	NodeList children_nodelist = element.getChildNodes();
792	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
793	{
794	Node child_node = children_nodelist.item(i);
795	if (child_node.getNodeType() == Node.TEXT_NODE)
796	{
797	element.removeChild(child_node);
798	}
799	}
800
801	// Add a new text node
802	if (text != null)
803	{
804	element.appendChild(element.getOwnerDocument().createTextNode(text));
805	}
806	}
807
808	/**
809	* Set the #text node value of some element.
810	*
811	* @param element
812	* the Element whose value we wish to set
813	* @param value
814	* the new value for the element as a String Soon to be
815	* deprecated!
816	*/
817	static final public void setValue(Element element, String value)
818	{
819	// Remove any existing child node(s)
820	clear(element);
821	// Add new text node.
822	if (value != null)
823	{
824	element.appendChild(element.getOwnerDocument().createTextNode(value));
825	}
826	}
827
828	static public void indentXML(Element elem, int depth)
829	{
830	Document doc = elem.getOwnerDocument();
831
832	String startIndentString = "\n";
833	for (int i = 0; i < depth; i++)
834	{
835	startIndentString += "\t";
836	}
837	Node startTextNode = doc.createTextNode(startIndentString);
838
839	String endIndentString = "\n";
840	for (int i = 0; i < depth - 1; i++)
841	{
842	endIndentString += "\t";
843	}
844	Node endTextNode = doc.createTextNode(endIndentString);
845
846	boolean found = false;
847	Node child = elem.getFirstChild();
848	while (child != null)
849	{
850	// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
851	if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
852	{
853	Node spaceTextNode = child;
854	child = child.getNextSibling();
855	elem.removeChild(spaceTextNode);
856
857	if(child == null) break;
858	}
859
860	// now process normal element nodes as intended
861	if (child.getNodeType() == Node.ELEMENT_NODE)
862	{
863	found = true;
864	break;
865	}
866	child = child.getNextSibling();
867	}
868
869	if (found)
870	{
871	elem.appendChild(endTextNode);
872	}
873
874	child = elem.getFirstChild();
875	while (child != null)
876	{
877	// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
878	// because the first while loop above would break out when it found an element node and wouldn't have got rid
879	// of all the empty text nodes yet.
880	// This time, beware not to delete the special end and start empty textnodes just added, since
881	// they've been created and inserted specifically.
882	if(child != endTextNode && child != startTextNode
883	&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
884	{
885	Node spaceTextNode = child;
886	child = child.getNextSibling();
887	elem.removeChild(spaceTextNode);
888
889	if(child == null) break;
890	}
891
892	// go back to processing normal element nodes as intended
893	if (child.getNodeType() == Node.ELEMENT_NODE)
894	{
895	elem.insertBefore(startTextNode.cloneNode(false), child);
896	indentXML((Element) child, depth + 1);
897	}
898	child = child.getNextSibling();
899	}
900	}
901
902	/**
903	* Write an XML document to a given file with the text node of the specified
904	* element unescaped
905	*/
906	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
907	{
908	indentXML(document.getDocumentElement(), 1);
909	try
910	{
911	OutputStream os = new FileOutputStream(xml_file);
912	// Create an output format for our document.
913	OutputFormat f = new OutputFormat(document);
914	f.setEncoding("UTF-8");
915	f.setIndenting(true);
916	f.setLineWidth(0); // Why isn't this working!
917	f.setPreserveSpace(true);
918	if (nonEscapingTagNames != null)
919	{
920	f.setNonEscapingElements(nonEscapingTagNames);
921	}
922	// Create the necessary writer stream for serialization.
923	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
924	Writer w = new BufferedWriter(osw);
925	// Generate a new serializer from the above.
926	XMLSerializer s = new XMLSerializer(w, f);
927	s.asDOMSerializer();
928	// Finally serialize the document to file.
929	s.serialize(document);
930	// And close.
931	os.close();
932	}
933	catch (Exception exception)
934	{
935	DebugStream.printStackTrace(exception);
936	}
937	}
938
939	/** Write an XML document to a given file */
940	static public void writeXMLFile(File xml_file, Document document)
941	{
942	writeXMLFile(xml_file, document, null);
943	}
944
945	public static void printXMLNode(Node e)
946	{
947	printXMLNode(e, 0);
948	}
949
950	public static void printXMLNode(Node e, int depth)
951	{ //recursive method call using DOM API...
952
953	for (int i = 0; i < depth; i++)
954	System.out.print(' ');
955
956	if (e.getNodeType() == Node.TEXT_NODE)
957	{
958	//System.out.println("text") ;
959	if (e.getNodeValue() != "")
960	{
961	System.out.println(e.getNodeValue());
962	}
963	return;
964	}
965
966	System.out.print('<');
967	System.out.print(e.getNodeName());
968	NamedNodeMap attrs = e.getAttributes();
969	if (attrs != null)
970	{
971	for (int i = 0; i < attrs.getLength(); i++)
972	{
973	Node attr = attrs.item(i);
974	System.out.print(' ');
975	System.out.print(attr.getNodeName());
976	System.out.print("=\"");
977	System.out.print(attr.getNodeValue());
978	System.out.print('"');
979	}
980	}
981	NodeList children = e.getChildNodes();
982
983	if (children == null \|\| children.getLength() == 0)
984	System.out.println("/>");
985	else
986	{
987
988	System.out.println('>');
989
990	int len = children.getLength();
991	for (int i = 0; i < len; i++)
992	{
993	printXMLNode(children.item(i), depth + 1);
994	}
995
996	for (int i = 0; i < depth; i++)
997	System.out.print(' ');
998
999	System.out.println("</" + e.getNodeName() + ">");
1000	}
1001
1002	}
1003
1004	public static String xmlNodeToString(Node e)
1005	{
1006	StringBuffer sb = new StringBuffer("");
1007	xmlNodeToString(sb, e, true, "\t", 2);
1008	return sb.toString();
1009	}
1010
1011	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1012	{
1013
1014	if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1015	{
1016	if (e.getNodeValue() != "")
1017	{
1018	String text = e.getNodeValue();
1019	sb.append("<![CDATA[");
1020	sb.append(text);
1021	sb.append("]]>");
1022	}
1023	return;
1024	}
1025
1026	if (e.getNodeType() == Node.TEXT_NODE)
1027	{
1028	if (e.getNodeValue() != "")
1029	{
1030	String text = e.getNodeValue();
1031	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
1032	for (Character c : text.toCharArray())
1033	{
1034	if (c.equals('\n'))
1035	{
1036	text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1037	break;
1038	}
1039
1040	if (!Character.isWhitespace(c))
1041	{
1042	break;
1043	}
1044	}
1045	sb.append(text);
1046	}
1047	return;
1048	}
1049
1050	if (e.getNodeType() == Node.COMMENT_NODE)
1051	{
1052	if (e.getNodeValue() != "")
1053	{
1054	sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1055	}
1056	return;
1057	}
1058
1059	if (indent)
1060	{
1061	for (int i = 0; i < depth; i++)
1062	{
1063	sb.append(indentString);
1064	}
1065	}
1066
1067	sb.append('<');
1068	sb.append(e.getNodeName());
1069	NamedNodeMap attrs = e.getAttributes();
1070	if (attrs != null)
1071	{
1072	for (int i = 0; i < attrs.getLength(); i++)
1073	{
1074	Node attr = attrs.item(i);
1075	sb.append(' ');
1076	sb.append(attr.getNodeName());
1077	sb.append("=\"");
1078	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
1079	sb.append('"');
1080	}
1081	}
1082	NodeList children = e.getChildNodes();
1083
1084	boolean hasElements = false;
1085	boolean indentSwapped = false;
1086	for (int i = 0; i < children.getLength(); i++)
1087	{
1088	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1089	{
1090	hasElements = true;
1091	}
1092	if ((children.item(i).getNodeType() == Node.TEXT_NODE \|\| children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1093	{
1094	if (children.item(i).getNodeValue().trim().length() > 0)
1095	{
1096	indentSwapped = true;
1097	indent = false;
1098	}
1099	}
1100	}
1101
1102	if (children == null \|\| children.getLength() == 0)
1103	{
1104	sb.append("/>");
1105
1106	if (indent)
1107	{
1108	sb.append("\n");
1109	}
1110	}
1111	else
1112	{
1113	sb.append(">");
1114	if (hasElements && indent)
1115	{
1116	sb.append("\n");
1117	}
1118
1119	int len = children.getLength();
1120	for (int i = 0; i < len; i++)
1121	{
1122	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1123	}
1124
1125	if (indent)
1126	{
1127	for (int i = 0; i < depth; i++)
1128	{
1129	sb.append(indentString);
1130	}
1131	}
1132
1133	sb.append("</" + e.getNodeName() + ">");
1134
1135	if ((hasElements && indent) \|\| indentSwapped)
1136	{
1137	sb.append("\n");
1138	}
1139	}
1140	}
1141
1142	public static String xmlNodeToStringWithoutIndenting(Node e)
1143	{
1144	StringBuffer sb = new StringBuffer("");
1145	xmlNodeToStringWithoutNewline(sb, e, -1);
1146	return sb.toString();
1147	}
1148
1149	public static String xmlNodeToStringWithoutNewline(Node e)
1150	{
1151	StringBuffer sb = new StringBuffer("");
1152	xmlNodeToStringWithoutNewline(sb, e, 0);
1153	return sb.toString();
1154	}
1155
1156	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1157	{
1158
1159	for (int i = 0; i < depth; i++)
1160	{
1161	sb.append(' ');
1162	}
1163
1164	if (e.getNodeType() == Node.TEXT_NODE)
1165	{
1166	if (e.getNodeValue() != "")
1167	{
1168	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1169	}
1170	return;
1171	}
1172
1173	if (e.getNodeType() == Node.COMMENT_NODE)
1174	{
1175	if (e.getNodeValue() != "")
1176	{
1177	sb.append("<!--" + e.getNodeValue() + "-->");
1178	}
1179	return;
1180	}
1181
1182	sb.append('<');
1183	sb.append(e.getNodeName());
1184	NamedNodeMap attrs = e.getAttributes();
1185	if (attrs != null)
1186	{
1187	for (int i = 0; i < attrs.getLength(); i++)
1188	{
1189	Node attr = attrs.item(i);
1190	sb.append(' ');
1191	sb.append(attr.getNodeName());
1192	sb.append("=\"");
1193	sb.append(attr.getNodeValue());
1194	sb.append('"');
1195	}
1196	}
1197	NodeList children = e.getChildNodes();
1198
1199	if (children == null \|\| children.getLength() == 0)
1200	sb.append("/>");
1201	else
1202	{
1203
1204	sb.append(">");
1205
1206	int len = children.getLength();
1207	for (int i = 0; i < len; i++)
1208	{
1209	if (depth >= 0)
1210	{
1211	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1212	}
1213	else
1214	{
1215	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1216	}
1217	}
1218
1219	for (int i = 0; i < depth; i++)
1220	sb.append(' ');
1221
1222	sb.append("</" + e.getNodeName() + ">");
1223	}
1224	}
1225	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: