Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 30678

Last change on this file since 30678 was 29730, checked in by ak19, 9 years ago
The second and final part of the commits to getting GLI running again and parsing web.xml, after the changes to commit r29687, where web.xml was split into two and included server.xml. In this commit: 1. GLI uses an EntityResolver to resolve entities in web.xml that are defined in the included servlets.xml file. In order to keep XMLTools.java tidy and hopefully make the GLI entity resolver more reusable, the new GLIEntityResolver.java class checks default search paths first when asked to resolve entities. web/WEB-INF, where web.xml and servlets.xml live, has been added to the default search paths, as also the gli user dir where the web.xml and server.xml will be in a client-gli situation. 2. Small tidy up to Greenstone runtime's GSEntityResolver. 3. Remote Greenstone gliserver.pl needs to also transfer the new server.xml file when zipping up web.xml. 4. Minor touchups to the new README on apache.jar.
Property svn:keywords set to `Author Date Id Revision`
File size: 32.0 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15
16	// SAX
17	import org.xml.sax.XMLReader;
18	import org.xml.sax.SAXException;
19	import org.xml.sax.SAXParseException;
20	import org.xml.sax.helpers.DefaultHandler;
21	import org.xml.sax.InputSource;
22
23	// JAXP
24	import javax.xml.parsers.DocumentBuilder;
25	import javax.xml.parsers.DocumentBuilderFactory;
26	import javax.xml.parsers.FactoryConfigurationError;
27	import javax.xml.parsers.ParserConfigurationException;
28	import javax.xml.parsers.SAXParser;
29	import javax.xml.parsers.SAXParserFactory;
30
31
32	/** This class is a static class containing useful XML functions */
33	public class XMLTools
34	{
35	/** extracts the text out of a node */
36	public static Node getNodeTextNode(Element param)
37	{
38	param.normalize();
39	Node n = param.getFirstChild();
40	while (n != null && n.getNodeType() != Node.TEXT_NODE)
41	{
42	n = n.getNextSibling();
43	}
44	return n;
45	}
46
47	/** extracts the text out of a node */
48	public static String getNodeText(Element param)
49	{
50	Node text_node = getNodeTextNode(param);
51	if (text_node == null)
52	{
53	return "";
54	}
55	return text_node.getNodeValue();
56	}
57
58	public static void setNodeText(Element elem, String text)
59	{
60	Node old_text_node = getNodeTextNode(elem);
61	if (old_text_node != null)
62	{
63	elem.removeChild(old_text_node);
64	}
65	Text t = elem.getOwnerDocument().createTextNode(text);
66	elem.appendChild(t);
67	}
68
69	/** returns the (first) child element with the given name */
70	public static Node getChildByTagName(Node n, String name)
71	{
72
73	Node child = n.getFirstChild();
74	while (child != null)
75	{
76	if (child.getNodeName().equals(name))
77	{
78	return child;
79	}
80	child = child.getNextSibling();
81	}
82	return null; //not found
83	}
84
85	/**
86	* returns the (nth) child element with the given name index numbers start
87	* at 0
88	*/
89	public static Node getChildByTagNameIndexed(Node n, String name, int index)
90	{
91	if (index == -1)
92	{
93	return getChildByTagName(n, name);
94	}
95	int count = 0;
96	Node child = n.getFirstChild();
97	while (child != null)
98	{
99	if (child.getNodeName().equals(name))
100	{
101	if (count == index)
102	{
103	return child;
104	}
105	else
106	{
107	count++;
108	}
109	}
110	child = child.getNextSibling();
111	}
112	return null; //not found
113	}
114
115	/**
116	* returns the element parent/node_name[@attribute_name='attribute_value']
117	*/
118	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
119	{
120
121	NodeList children = parent.getChildNodes();
122	for (int i = 0; i < children.getLength(); i++)
123	{
124	Node child = children.item(i);
125	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
126	if (child.getNodeName().equals(node_name))
127	{
128	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
129	return (Element) child;
130	}
131	}
132	// not found
133	return null;
134	}
135
136	/**
137	* returns a list of elements
138	* parent/node_name[@attribute_name='attribute_value']
139	*/
140	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
141	{
142	ArrayList elements = new ArrayList();
143	NodeList children = parent.getChildNodes();
144	for (int i = 0; i < children.getLength(); i++)
145	{
146	//System.out.println("getNamedElementList");
147	Node child = children.item(i);
148	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
149	if (child.getNodeName().equals(node_name))
150	{
151	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
152	elements.add((Element) child);
153	}
154	}
155	// not found
156	if (elements.size() == 0)
157	{
158	elements = null;
159	}
160	return elements;
161	}
162
163	public static void copyAllChildren(Element to, Element from)
164	{
165
166	Document to_doc = to.getOwnerDocument();
167	Node child = from.getFirstChild();
168	while (child != null)
169	{
170	to.appendChild(to_doc.importNode(child, true));
171	child = child.getNextSibling();
172	}
173	}
174
175	/** duplicates all elements in list elements and appends to toElement */
176	public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
177	int num_elems = elements.getLength();
178	if (num_elems < 1)
179	{
180	return;
181	}
182	for (int i = 0; i < num_elems; i++)
183	{
184	Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
185	toElement.appendChild(to_element);
186	}
187
188	}
189	/** Duplicates an element */
190	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
191	{
192	return duplicateElementNS(owner, element, null, with_attributes);
193	}
194
195	/** Duplicates an element */
196	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
197	{
198	Element duplicate;
199	if (namespace_uri == null)
200	{
201	duplicate = owner.createElement(element.getTagName());
202	}
203	else
204	{
205	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
206	}
207	// Copy element attributes
208	if (with_attributes)
209	{
210	NamedNodeMap attributes = element.getAttributes();
211	for (int i = 0; i < attributes.getLength(); i++)
212	{
213	Node attribute = attributes.item(i);
214	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
215	}
216	}
217
218	// Copy element children
219	NodeList children = element.getChildNodes();
220	for (int i = 0; i < children.getLength(); i++)
221	{
222	Node child = children.item(i);
223	duplicate.appendChild(owner.importNode(child, true));
224	}
225
226	return duplicate;
227	}
228
229	/** Remove all of the child nodes from a certain node. */
230	static final public void clear(Node node)
231	{
232	while (node.hasChildNodes())
233	{
234	node.removeChild(node.getFirstChild());
235	}
236	}
237
238	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
239	{
240	ArrayList child_elements = new ArrayList();
241
242	NodeList children_nodelist = parent_element.getChildNodes();
243	for (int i = 0; i < children_nodelist.getLength(); i++)
244	{
245	Node child_node = children_nodelist.item(i);
246	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
247	{
248	child_elements.add(child_node);
249	}
250	}
251
252	return child_elements;
253	}
254
255	static public String getElementTextValue(Element element)
256	{
257	// Find the first text node child
258	NodeList children_nodelist = element.getChildNodes();
259	for (int i = 0; i < children_nodelist.getLength(); i++)
260	{
261	Node child_node = children_nodelist.item(i);
262	if (child_node.getNodeType() == Node.TEXT_NODE)
263	{
264	return child_node.getNodeValue();
265	}
266	}
267
268	// None found
269	return "";
270	}
271
272	/**
273	* Method to retrieve the value of a given node.
274	*
275	* @param element
276	* The <strong>Element</strong> whose value we wish to find. Soon
277	* to be deprecated!
278	*/
279	static final public String getValue(Node element)
280	{
281	if (element == null)
282	{
283	return "";
284	}
285	// If we've been given a subject node first retrieve its value node.
286	if (element.getNodeName().equals("Subject"))
287	{
288	element = getNodeFromNamed(element, "Value");
289	}
290	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
291	if (element != null && element.hasChildNodes())
292	{
293	StringBuffer text_buffer = new StringBuffer();
294	NodeList text_nodes = element.getChildNodes();
295	for (int i = 0; i < text_nodes.getLength(); i++)
296	{
297	Node possible_text = text_nodes.item(i);
298	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
299	{
300	text_buffer.append(possible_text.getNodeValue());
301	}
302	}
303	return text_buffer.toString();
304	}
305	return "";
306	}
307
308	/**
309	* Method to retrieve from the node given, a certain child node with the
310	* specified name.
311	*
312	* @param parent
313	* The <strong>Node</strong> whose children should be searched.
314	* @param name
315	* The required nodes name as a <strong>String</strong>.
316	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
317	* otherwise. Soon to be deprecated!
318	*/
319	static final public Node getNodeFromNamed(Node parent, String name)
320	{
321	Node child = null;
322	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
323	{
324	if (i.getNodeName().equals(name))
325	{
326	child = i;
327	}
328	}
329	return child;
330	}
331
332	static final public String WELLFORMED = "well-formed !";
333	static final public String NOTWELLFORMED = "not well-formed";
334	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
335	static final private String FOOTER = "</collectionConfig>";
336
337
338	public static Document getDOM(String xml_str)
339	{
340	Document doc = null;
341	try {
342
343	DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
344	InputSource is = new InputSource();
345	is.setCharacterStream(new StringReader(xml_str));
346	doc = db.parse(is);
347
348	} catch (Exception e) {
349	e.printStackTrace();
350	}
351	return doc;
352	}
353
354	public static String parse(String xml_str)
355	{
356	String validation_msg = WELLFORMED;
357	xml_str = HEADER + xml_str + FOOTER;
358	try
359	{
360	SAXParserFactory factory = SAXParserFactory.newInstance();
361	factory.setNamespaceAware(true);
362	//factory.setValidating (true);
363	SAXParser parser = factory.newSAXParser();
364	InputSource iSource = new InputSource(new StringReader(xml_str));
365	// parser.parse (iSource, new DefaultHandler ());
366
367	org.xml.sax.XMLReader reader = parser.getXMLReader();
368	reader.setContentHandler(new DefaultHandler());
369	reader.setErrorHandler(new DefaultHandler());
370	reader.parse(iSource);
371	}
372	catch (FactoryConfigurationError e)
373	{
374	validation_msg = "unable to get a document builder factory";
375	}
376	catch (ParserConfigurationException e)
377	{
378	validation_msg = "unable to configure parser";
379	}
380	catch (SAXParseException e)
381	{
382	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
383	}
384	catch (SAXException e)
385	{
386	validation_msg += " Fatal error: " + e.toString();
387	}
388	catch (IOException e)
389	{
390	validation_msg = "Unable to read the input, i/o error";
391	}
392
393	return validation_msg;
394	}
395
396	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
397	public static String parseDOM(String xml_str)
398	{
399	String validation_msg = WELLFORMED;
400
401	try
402	{
403	SAXParserFactory factory = SAXParserFactory.newInstance();
404	factory.setNamespaceAware(true);
405	//factory.setValidating (true);
406	SAXParser parser = factory.newSAXParser();
407	InputSource iSource = new InputSource(new StringReader(xml_str));
408	// parser.parse (iSource, new DefaultHandler ());
409
410	org.xml.sax.XMLReader reader = parser.getXMLReader();
411	reader.setContentHandler(new DefaultHandler());
412	reader.setErrorHandler(new DefaultHandler());
413	reader.parse(iSource);
414	}
415	catch (FactoryConfigurationError e)
416	{
417	validation_msg = "unable to get a document builder factory";
418	}
419	catch (ParserConfigurationException e)
420	{
421	validation_msg = "unable to configure parser";
422	}
423	catch (SAXParseException e)
424	{
425	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
426	}
427	catch (SAXException e)
428	{
429	validation_msg += " " + e.toString();
430	}
431	catch (IOException e)
432	{
433	validation_msg = "Unable to read the input, i/o error";
434	}
435
436	return validation_msg;
437	}
438
439	public static String parse(File xml_file)
440	{
441	String validation_msg = WELLFORMED;
442
443	try
444	{
445	SAXParserFactory factory = SAXParserFactory.newInstance();
446	factory.setNamespaceAware(true);
447	//factory.setValidating (true);
448	SAXParser parser = factory.newSAXParser();
449	FileReader r = new FileReader(xml_file);
450	InputSource iSource = new InputSource(r);
451	XMLReader reader = parser.getXMLReader();
452	reader.setContentHandler(new DefaultHandler());
453	reader.setErrorHandler(new DefaultHandler());
454	reader.parse(iSource);
455	}
456	catch (FactoryConfigurationError e)
457	{
458	validation_msg = "unable to get a document builder factory";
459	}
460	catch (ParserConfigurationException e)
461	{
462	validation_msg = "unable to configure parser";
463	}
464	catch (SAXParseException e)
465	{
466	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
467	}
468	catch (SAXException e)
469	{
470	validation_msg += " Fatal error: " + e.toString();
471	}
472	catch (IOException e)
473	{
474	validation_msg = "Unable to read the input, i/o error";
475	}
476
477	return validation_msg;
478	}
479
480	/** Returns a string of the location. */
481	private static String getLocationString(SAXParseException ex)
482	{
483	StringBuffer str = new StringBuffer();
484
485	String systemId = ex.getSystemId();
486	if (systemId != null)
487	{
488	int index = systemId.lastIndexOf('/');
489	if (index != -1)
490	systemId = systemId.substring(index + 1);
491	str.append(systemId);
492	}
493	str.append("(line ");
494	str.append(ex.getLineNumber() - 1);
495	str.append(", column ");
496	str.append(ex.getColumnNumber());
497	str.append("): ");
498
499	return str.toString();
500
501	} // getLocationString(SAXParseException):String
502
503	/** Parse an XML document from a given file path */
504	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
505	{
506	if (use_class_loader == true)
507	{
508	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
509	if (is != null)
510	{
511	return parseXML(is);
512	}
513	}
514
515	// Try the file outside the classes directory
516	return parseXMLFile(new File(xml_file_path));
517	}
518
519	/** Parse an XML document from a given file */
520	static public Document parseXMLFile(File xml_file)
521	{
522	// No file? No point trying!
523	if (xml_file.exists() == false)
524	{
525	return null;
526	}
527
528	try
529	{
530	return parseXML(new FileInputStream(xml_file));
531	}
532	catch (Exception exception)
533	{
534	DebugStream.printStackTrace(exception);
535	return null;
536	}
537	}
538
539	/** Parse an XML document from a given input stream */
540	static public Document parseXML(InputStream xml_input_stream)
541	{
542	Document document = null;
543
544	try
545	{
546	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
547	document = parseXML(isr);
548	isr.close();
549	xml_input_stream.close();
550	}
551	catch (Exception exception)
552	{
553	DebugStream.printStackTrace(exception);
554	}
555
556	return document;
557	}
558
559	/** Parse an XML document from a given reader */
560	static public Document parseXML(Reader xml_reader)
561	{
562	Document document = null;
563
564	// If debugging, the following will store the XML contents to be parsed,
565	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
566	String xmlContents = "";
567
568	try
569	{
570	Reader reader = null;
571
572	// (1) By default, GLI will remove any contents preceeding (and invalidating)
573	// the XML and present these lines separately to the user
574	if (!DebugStream.isDebuggingEnabled())
575	{
576	try
577	{
578	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
579	}
580	catch (Exception e)
581	{
582	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
583	e.printStackTrace();
584	}
585	}
586
587	// (2) If we are running GLI in debug mode:
588	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
589	// idea of where things went wrong. This will print the "XML" contents to either
590	// system.out (if debugging is off) or to the DebugStream otherwise.
591	// We need to read the XML twice to know the line where things went wrong, so
592	// do the additional reading only if we're debugging
593	else
594	{
595	StringBuffer buf = new StringBuffer();
596	char[] buffer = new char[500];
597	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
598	while (numCharsRead != -1)
599	{
600	buf.append(buffer, 0, numCharsRead);
601	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
602	}
603	xmlContents = buf.toString();
604	xml_reader.close(); // closing the old Reader
605	xml_reader = null;
606	buffer = null;
607	buf = null;
608	// we need a Reader to parse the same contents as the Reader that was just closed
609	reader = new BufferedReader(new StringReader(xmlContents));
610	//System.err.println("xmlContents:\n" + xmlContents);
611	}
612
613	// (2) The actual XML parsing
614	InputSource isc = new InputSource(reader);
615	DOMParser parser = new DOMParser();
616	parser.setFeature("http://xml.org/sax/features/validation", false);
617	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
618	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
619	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
620	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
621	parser.setEntityResolver(new GLIEntityResolver());
622	parser.parse(isc);
623	document = parser.getDocument();
624
625	}
626	catch (SAXParseException e)
627	{
628	showXMLParseFailureLine(e, xmlContents);
629	}
630	catch (SAXException exception)
631	{
632	System.err.println("SAX exception: " + exception.getMessage());
633	if (DebugStream.isDebuggingEnabled())
634	{
635	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
636	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
637	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
638	System.exit(-1);
639	}
640	// else, not running in debug mode, so don't exit after exception
641	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
642	DebugStream.printStackTrace(exception);
643	}
644	catch (Exception exception)
645	{
646	DebugStream.printStackTrace(exception);
647	}
648
649	return document;
650	}
651
652	/**
653	* Displays the line (string) where the SAXParseException occurred, given a
654	* String of the entire xml that was being parsed and the SAXParseException
655	* object that was caught. The messages are printed to DebugStream, so run
656	* GLI/FLI with -debug to view this output.
657	*
658	* @param xmlContents
659	* is the entire xml that was being parsed when the exception
660	* occurred
661	* @param e
662	* is the SAXParseException object that was thrown upon parsing
663	* the xmlContents.
664	*/
665	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
666	{
667
668	// There should be no characters at all that preceed the <?xml>... bit.
669	// The first check is for starting spaces:
670	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
671	{
672	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
673	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
674	return; // nothing more to do, first error identified
675	}
676
677	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
678	String line = "";
679	int linenumber = e.getLineNumber();
680	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
681	if (DebugStream.isDebuggingEnabled())
682	{
683	if (linenumber != -1)
684	{
685	String[] lines = xmlContents.split("\n");
686	if (lines.length > 0)
687	{
688	DebugStream.println(" (number of lines: " + lines.length + ")");
689	if (lines.length >= linenumber)
690	{
691	line = lines[linenumber - 1];
692	}
693	else
694	{ // error is past the last line
695	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
696	}
697	}
698	else
699	{
700	DebugStream.print("\n");
701	}
702	lines = null;
703
704	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
705	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
706
707	// Uncomment if you want to print out the entire contents of the XML doc:
708	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
709	// + xmlContents + "\n************END\n");
710	}
711	else
712	{ // no particular line number, print out all the xml so debugger can inspect it
713	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
714	}
715	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
716	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
717	System.exit(-1);
718	}
719	else
720	{ // not running in debug mode
721	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
722	}
723	}
724
725	static public StringBuffer readXMLStream(InputStream input_stream)
726	{
727	StringBuffer xml = new StringBuffer("");
728
729	try
730	{
731	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
732	BufferedReader buffered_in = new BufferedReader(isr);
733
734	String line = "";
735	boolean xml_content = false;
736	while ((line = buffered_in.readLine()) != null)
737	{
738	if (xml_content)
739	{
740	xml.append(line);
741	xml.append("\n");
742	}
743	else if (line.trim().startsWith("<?xml"))
744	{
745	xml_content = true;
746	xml.append(line);
747	xml.append("\n");
748	}
749	else
750	{
751	System.err.println(line);
752	}
753	}
754	buffered_in = null;
755	}
756	catch (Exception error)
757	{
758	System.err.println("Failed when trying to parse XML stream");
759	error.printStackTrace();
760	}
761
762	return xml;
763	}
764
765	/**
766	* Removes characters that are invalid in XML (see
767	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
768	*/
769	static public String removeInvalidCharacters(String text)
770	{
771	char[] safe_characters = new char[text.length()];
772	int j = 0;
773
774	char[] raw_characters = new char[text.length()];
775	text.getChars(0, text.length(), raw_characters, 0);
776	for (int i = 0; i < raw_characters.length; i++)
777	{
778	char character = raw_characters[i];
779	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
780	{
781	safe_characters[j] = character;
782	j++;
783	}
784	}
785
786	return new String(safe_characters, 0, j);
787	}
788
789	static public void setElementTextValue(Element element, String text)
790	{
791	// Remove all text node children
792	NodeList children_nodelist = element.getChildNodes();
793	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
794	{
795	Node child_node = children_nodelist.item(i);
796	if (child_node.getNodeType() == Node.TEXT_NODE)
797	{
798	element.removeChild(child_node);
799	}
800	}
801
802	// Add a new text node
803	if (text != null)
804	{
805	element.appendChild(element.getOwnerDocument().createTextNode(text));
806	}
807	}
808
809	/**
810	* Set the #text node value of some element.
811	*
812	* @param element
813	* the Element whose value we wish to set
814	* @param value
815	* the new value for the element as a String Soon to be
816	* deprecated!
817	*/
818	static final public void setValue(Element element, String value)
819	{
820	// Remove any existing child node(s)
821	clear(element);
822	// Add new text node.
823	if (value != null)
824	{
825	element.appendChild(element.getOwnerDocument().createTextNode(value));
826	}
827	}
828
829	static public void indentXML(Element elem, int depth)
830	{
831	Document doc = elem.getOwnerDocument();
832
833	String startIndentString = "\n";
834	for (int i = 0; i < depth; i++)
835	{
836	startIndentString += "\t";
837	}
838	Node startTextNode = doc.createTextNode(startIndentString);
839
840	String endIndentString = "\n";
841	for (int i = 0; i < depth - 1; i++)
842	{
843	endIndentString += "\t";
844	}
845	Node endTextNode = doc.createTextNode(endIndentString);
846
847	boolean found = false;
848	Node child = elem.getFirstChild();
849	while (child != null)
850	{
851	// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
852	if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
853	{
854	Node spaceTextNode = child;
855	child = child.getNextSibling();
856	elem.removeChild(spaceTextNode);
857
858	if(child == null) break;
859	}
860
861	// now process normal element nodes as intended
862	if (child.getNodeType() == Node.ELEMENT_NODE)
863	{
864	found = true;
865	break;
866	}
867	child = child.getNextSibling();
868	}
869
870	if (found)
871	{
872	elem.appendChild(endTextNode);
873	}
874
875	child = elem.getFirstChild();
876	while (child != null)
877	{
878	// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
879	// because the first while loop above would break out when it found an element node and wouldn't have got rid
880	// of all the empty text nodes yet.
881	// This time, beware not to delete the special end and start empty textnodes just added, since
882	// they've been created and inserted specifically.
883	if(child != endTextNode && child != startTextNode
884	&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
885	{
886	Node spaceTextNode = child;
887	child = child.getNextSibling();
888	elem.removeChild(spaceTextNode);
889
890	if(child == null) break;
891	}
892
893	// go back to processing normal element nodes as intended
894	if (child.getNodeType() == Node.ELEMENT_NODE)
895	{
896	elem.insertBefore(startTextNode.cloneNode(false), child);
897	indentXML((Element) child, depth + 1);
898	}
899	child = child.getNextSibling();
900	}
901	}
902
903	/**
904	* Write an XML document to a given file with the text node of the specified
905	* element unescaped
906	*/
907	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
908	{
909	indentXML(document.getDocumentElement(), 1);
910	try
911	{
912	OutputStream os = new FileOutputStream(xml_file);
913	// Create an output format for our document.
914	OutputFormat f = new OutputFormat(document);
915	f.setEncoding("UTF-8");
916	f.setIndenting(true);
917	f.setLineWidth(0); // Why isn't this working!
918	f.setPreserveSpace(true);
919	if (nonEscapingTagNames != null)
920	{
921	f.setNonEscapingElements(nonEscapingTagNames);
922	}
923	// Create the necessary writer stream for serialization.
924	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
925	Writer w = new BufferedWriter(osw);
926	// Generate a new serializer from the above.
927	XMLSerializer s = new XMLSerializer(w, f);
928	s.asDOMSerializer();
929	// Finally serialize the document to file.
930	s.serialize(document);
931	// And close.
932	os.close();
933	}
934	catch (Exception exception)
935	{
936	DebugStream.printStackTrace(exception);
937	}
938	}
939
940	/** Write an XML document to a given file */
941	static public void writeXMLFile(File xml_file, Document document)
942	{
943	writeXMLFile(xml_file, document, null);
944	}
945
946	public static void printXMLNode(Node e)
947	{
948	printXMLNode(e, 0);
949	}
950
951	public static void printXMLNode(Node e, int depth)
952	{ //recursive method call using DOM API...
953
954	for (int i = 0; i < depth; i++)
955	System.out.print(' ');
956
957	if (e.getNodeType() == Node.TEXT_NODE)
958	{
959	//System.out.println("text") ;
960	if (e.getNodeValue() != "")
961	{
962	System.out.println(e.getNodeValue());
963	}
964	return;
965	}
966
967	System.out.print('<');
968	System.out.print(e.getNodeName());
969	NamedNodeMap attrs = e.getAttributes();
970	if (attrs != null)
971	{
972	for (int i = 0; i < attrs.getLength(); i++)
973	{
974	Node attr = attrs.item(i);
975	System.out.print(' ');
976	System.out.print(attr.getNodeName());
977	System.out.print("=\"");
978	System.out.print(attr.getNodeValue());
979	System.out.print('"');
980	}
981	}
982	NodeList children = e.getChildNodes();
983
984	if (children == null \|\| children.getLength() == 0)
985	System.out.println("/>");
986	else
987	{
988
989	System.out.println('>');
990
991	int len = children.getLength();
992	for (int i = 0; i < len; i++)
993	{
994	printXMLNode(children.item(i), depth + 1);
995	}
996
997	for (int i = 0; i < depth; i++)
998	System.out.print(' ');
999
1000	System.out.println("</" + e.getNodeName() + ">");
1001	}
1002
1003	}
1004
1005	public static String xmlNodeToString(Node e)
1006	{
1007	StringBuffer sb = new StringBuffer("");
1008	xmlNodeToString(sb, e, true, "\t", 2);
1009	return sb.toString();
1010	}
1011
1012	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1013	{
1014
1015	if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1016	{
1017	if (e.getNodeValue() != "")
1018	{
1019	String text = e.getNodeValue();
1020	sb.append("<![CDATA[");
1021	sb.append(text);
1022	sb.append("]]>");
1023	}
1024	return;
1025	}
1026
1027	if (e.getNodeType() == Node.TEXT_NODE)
1028	{
1029	if (e.getNodeValue() != "")
1030	{
1031	String text = e.getNodeValue();
1032	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
1033	for (Character c : text.toCharArray())
1034	{
1035	if (c.equals('\n'))
1036	{
1037	text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1038	break;
1039	}
1040
1041	if (!Character.isWhitespace(c))
1042	{
1043	break;
1044	}
1045	}
1046	sb.append(text);
1047	}
1048	return;
1049	}
1050
1051	if (e.getNodeType() == Node.COMMENT_NODE)
1052	{
1053	if (e.getNodeValue() != "")
1054	{
1055	sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1056	}
1057	return;
1058	}
1059
1060	if (indent)
1061	{
1062	for (int i = 0; i < depth; i++)
1063	{
1064	sb.append(indentString);
1065	}
1066	}
1067
1068	sb.append('<');
1069	sb.append(e.getNodeName());
1070	NamedNodeMap attrs = e.getAttributes();
1071	if (attrs != null)
1072	{
1073	for (int i = 0; i < attrs.getLength(); i++)
1074	{
1075	Node attr = attrs.item(i);
1076	sb.append(' ');
1077	sb.append(attr.getNodeName());
1078	sb.append("=\"");
1079	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
1080	sb.append('"');
1081	}
1082	}
1083	NodeList children = e.getChildNodes();
1084
1085	boolean hasElements = false;
1086	boolean indentSwapped = false;
1087	for (int i = 0; i < children.getLength(); i++)
1088	{
1089	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1090	{
1091	hasElements = true;
1092	}
1093	if ((children.item(i).getNodeType() == Node.TEXT_NODE \|\| children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1094	{
1095	if (children.item(i).getNodeValue().trim().length() > 0)
1096	{
1097	indentSwapped = true;
1098	indent = false;
1099	}
1100	}
1101	}
1102
1103	if (children == null \|\| children.getLength() == 0)
1104	{
1105	sb.append("/>");
1106
1107	if (indent)
1108	{
1109	sb.append("\n");
1110	}
1111	}
1112	else
1113	{
1114	sb.append(">");
1115	if (hasElements && indent)
1116	{
1117	sb.append("\n");
1118	}
1119
1120	int len = children.getLength();
1121	for (int i = 0; i < len; i++)
1122	{
1123	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1124	}
1125
1126	if (indent)
1127	{
1128	for (int i = 0; i < depth; i++)
1129	{
1130	sb.append(indentString);
1131	}
1132	}
1133
1134	sb.append("</" + e.getNodeName() + ">");
1135
1136	if ((hasElements && indent) \|\| indentSwapped)
1137	{
1138	sb.append("\n");
1139	}
1140	}
1141	}
1142
1143	public static String xmlNodeToStringWithoutIndenting(Node e)
1144	{
1145	StringBuffer sb = new StringBuffer("");
1146	xmlNodeToStringWithoutNewline(sb, e, -1);
1147	return sb.toString();
1148	}
1149
1150	public static String xmlNodeToStringWithoutNewline(Node e)
1151	{
1152	StringBuffer sb = new StringBuffer("");
1153	xmlNodeToStringWithoutNewline(sb, e, 0);
1154	return sb.toString();
1155	}
1156
1157	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1158	{
1159
1160	for (int i = 0; i < depth; i++)
1161	{
1162	sb.append(' ');
1163	}
1164
1165	if (e.getNodeType() == Node.TEXT_NODE)
1166	{
1167	if (e.getNodeValue() != "")
1168	{
1169	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1170	}
1171	return;
1172	}
1173
1174	if (e.getNodeType() == Node.COMMENT_NODE)
1175	{
1176	if (e.getNodeValue() != "")
1177	{
1178	sb.append("<!--" + e.getNodeValue() + "-->");
1179	}
1180	return;
1181	}
1182
1183	sb.append('<');
1184	sb.append(e.getNodeName());
1185	NamedNodeMap attrs = e.getAttributes();
1186	if (attrs != null)
1187	{
1188	for (int i = 0; i < attrs.getLength(); i++)
1189	{
1190	Node attr = attrs.item(i);
1191	sb.append(' ');
1192	sb.append(attr.getNodeName());
1193	sb.append("=\"");
1194	sb.append(attr.getNodeValue());
1195	sb.append('"');
1196	}
1197	}
1198	NodeList children = e.getChildNodes();
1199
1200	if (children == null \|\| children.getLength() == 0)
1201	sb.append("/>");
1202	else
1203	{
1204
1205	sb.append(">");
1206
1207	int len = children.getLength();
1208	for (int i = 0; i < len; i++)
1209	{
1210	if (depth >= 0)
1211	{
1212	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1213	}
1214	else
1215	{
1216	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1217	}
1218	}
1219
1220	for (int i = 0; i < depth; i++)
1221	sb.append(' ');
1222
1223	sb.append("</" + e.getNodeName() + ">");
1224	}
1225	}
1226	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: