Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 30705

Last change on this file since 30705 was 30705, checked in by ak19, 8 years ago
Merely tidying up.
Property svn:keywords set to `Author Date Id Revision`
File size: 33.4 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15	import java.io.StringWriter; // for elementToString()
16
17	// SAX
18	import org.xml.sax.XMLReader;
19	import org.xml.sax.SAXException;
20	import org.xml.sax.SAXParseException;
21	import org.xml.sax.helpers.DefaultHandler;
22	import org.xml.sax.InputSource;
23
24	// JAXP
25	import javax.xml.parsers.DocumentBuilder;
26	import javax.xml.parsers.DocumentBuilderFactory;
27	import javax.xml.parsers.FactoryConfigurationError;
28	import javax.xml.parsers.ParserConfigurationException;
29	import javax.xml.parsers.SAXParser;
30	import javax.xml.parsers.SAXParserFactory;
31	// for elementToString():
32	import javax.xml.transform.OutputKeys;
33	import javax.xml.transform.Transformer;
34	import javax.xml.transform.TransformerFactory;
35	import javax.xml.transform.dom.DOMSource;
36	import javax.xml.transform.stream.StreamResult;
37
38
39	/** This class is a static class containing useful XML functions */
40	public class XMLTools
41	{
42	/** extracts the text out of a node */
43	public static Node getNodeTextNode(Element param)
44	{
45	param.normalize();
46	Node n = param.getFirstChild();
47	while (n != null && n.getNodeType() != Node.TEXT_NODE)
48	{
49	n = n.getNextSibling();
50	}
51	return n;
52	}
53
54	/** extracts the text out of a node */
55	public static String getNodeText(Element param)
56	{
57	Node text_node = getNodeTextNode(param);
58	if (text_node == null)
59	{
60	return "";
61	}
62	return text_node.getNodeValue();
63	}
64
65	public static void setNodeText(Element elem, String text)
66	{
67	Node old_text_node = getNodeTextNode(elem);
68	if (old_text_node != null)
69	{
70	elem.removeChild(old_text_node);
71	}
72	Text t = elem.getOwnerDocument().createTextNode(text);
73	elem.appendChild(t);
74	}
75
76	/** returns the (first) child element with the given name */
77	public static Node getChildByTagName(Node n, String name)
78	{
79
80	Node child = n.getFirstChild();
81	while (child != null)
82	{
83	if (child.getNodeName().equals(name))
84	{
85	return child;
86	}
87	child = child.getNextSibling();
88	}
89	return null; //not found
90	}
91
92	/**
93	* returns the (nth) child element with the given name index numbers start
94	* at 0
95	*/
96	public static Node getChildByTagNameIndexed(Node n, String name, int index)
97	{
98	if (index == -1)
99	{
100	return getChildByTagName(n, name);
101	}
102	int count = 0;
103	Node child = n.getFirstChild();
104	while (child != null)
105	{
106	if (child.getNodeName().equals(name))
107	{
108	if (count == index)
109	{
110	return child;
111	}
112	else
113	{
114	count++;
115	}
116	}
117	child = child.getNextSibling();
118	}
119	return null; //not found
120	}
121
122	/**
123	* returns the element parent/node_name[@attribute_name='attribute_value']
124	*/
125	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126	{
127
128	NodeList children = parent.getChildNodes();
129	for (int i = 0; i < children.getLength(); i++)
130	{
131	Node child = children.item(i);
132	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
133	if (child.getNodeName().equals(node_name))
134	{
135	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136	return (Element) child;
137	}
138	}
139	// not found
140	return null;
141	}
142
143	/**
144	* returns a list of elements
145	* parent/node_name[@attribute_name='attribute_value']
146	*/
147	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148	{
149	ArrayList elements = new ArrayList();
150	NodeList children = parent.getChildNodes();
151	for (int i = 0; i < children.getLength(); i++)
152	{
153	//System.out.println("getNamedElementList");
154	Node child = children.item(i);
155	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
156	if (child.getNodeName().equals(node_name))
157	{
158	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159	elements.add((Element) child);
160	}
161	}
162	// not found
163	if (elements.size() == 0)
164	{
165	elements = null;
166	}
167	return elements;
168	}
169
170	public static void copyAllChildren(Element to, Element from)
171	{
172
173	Document to_doc = to.getOwnerDocument();
174	Node child = from.getFirstChild();
175	while (child != null)
176	{
177	to.appendChild(to_doc.importNode(child, true));
178	child = child.getNextSibling();
179	}
180	}
181
182	/** duplicates all elements in list elements and appends to toElement */
183	public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184	int num_elems = elements.getLength();
185	if (num_elems < 1)
186	{
187	return;
188	}
189	for (int i = 0; i < num_elems; i++)
190	{
191	Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192	toElement.appendChild(to_element);
193	}
194
195	}
196	/** Duplicates an element */
197	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198	{
199	return duplicateElementNS(owner, element, null, with_attributes);
200	}
201
202	/** Duplicates an element */
203	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204	{
205	Element duplicate;
206	if (namespace_uri == null)
207	{
208	duplicate = owner.createElement(element.getTagName());
209	}
210	else
211	{
212	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213	}
214	// Copy element attributes
215	if (with_attributes)
216	{
217	NamedNodeMap attributes = element.getAttributes();
218	for (int i = 0; i < attributes.getLength(); i++)
219	{
220	Node attribute = attributes.item(i);
221	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222	}
223	}
224
225	// Copy element children
226	NodeList children = element.getChildNodes();
227	for (int i = 0; i < children.getLength(); i++)
228	{
229	Node child = children.item(i);
230	duplicate.appendChild(owner.importNode(child, true));
231	}
232
233	return duplicate;
234	}
235
236	/** Remove all of the child nodes from a certain node. */
237	static final public void clear(Node node)
238	{
239	while (node.hasChildNodes())
240	{
241	node.removeChild(node.getFirstChild());
242	}
243	}
244
245	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246	{
247	ArrayList child_elements = new ArrayList();
248
249	NodeList children_nodelist = parent_element.getChildNodes();
250	for (int i = 0; i < children_nodelist.getLength(); i++)
251	{
252	Node child_node = children_nodelist.item(i);
253	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254	{
255	child_elements.add(child_node);
256	}
257	}
258
259	return child_elements;
260	}
261
262	static public String getElementTextValue(Element element)
263	{
264	// Find the first text node child
265	NodeList children_nodelist = element.getChildNodes();
266	for (int i = 0; i < children_nodelist.getLength(); i++)
267	{
268	Node child_node = children_nodelist.item(i);
269	if (child_node.getNodeType() == Node.TEXT_NODE)
270	{
271	return child_node.getNodeValue();
272	}
273	}
274
275	// None found
276	return "";
277	}
278
279	/**
280	* Method to retrieve the value of a given node.
281	*
282	* @param element
283	* The <strong>Element</strong> whose value we wish to find. Soon
284	* to be deprecated!
285	*/
286	static final public String getValue(Node element)
287	{
288	if (element == null)
289	{
290	return "";
291	}
292	// If we've been given a subject node first retrieve its value node.
293	if (element.getNodeName().equals("Subject"))
294	{
295	element = getNodeFromNamed(element, "Value");
296	}
297	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298	if (element != null && element.hasChildNodes())
299	{
300	StringBuffer text_buffer = new StringBuffer();
301	NodeList text_nodes = element.getChildNodes();
302	for (int i = 0; i < text_nodes.getLength(); i++)
303	{
304	Node possible_text = text_nodes.item(i);
305	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306	{
307	text_buffer.append(possible_text.getNodeValue());
308	}
309	}
310	return text_buffer.toString();
311	}
312	return "";
313	}
314
315	/**
316	* Method to retrieve from the node given, a certain child node with the
317	* specified name.
318	*
319	* @param parent
320	* The <strong>Node</strong> whose children should be searched.
321	* @param name
322	* The required nodes name as a <strong>String</strong>.
323	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
324	* otherwise. Soon to be deprecated!
325	*/
326	static final public Node getNodeFromNamed(Node parent, String name)
327	{
328	Node child = null;
329	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330	{
331	if (i.getNodeName().equals(name))
332	{
333	child = i;
334	}
335	}
336	return child;
337	}
338
339	static final public String WELLFORMED = "well-formed !";
340	static final public String NOTWELLFORMED = "not well-formed";
341	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342	static final private String FOOTER = "</collectionConfig>";
343
344
345	public static Document getDOM(String xml_str)
346	{
347	Document doc = null;
348	try {
349
350	DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351	InputSource is = new InputSource();
352	is.setCharacterStream(new StringReader(xml_str));
353	doc = db.parse(is);
354
355	} catch (Exception e) {
356	e.printStackTrace();
357	}
358	return doc;
359	}
360
361	public static String parse(String xml_str)
362	{
363	String validation_msg = WELLFORMED;
364	xml_str = HEADER + xml_str + FOOTER;
365	try
366	{
367	SAXParserFactory factory = SAXParserFactory.newInstance();
368	factory.setNamespaceAware(true);
369	//factory.setValidating (true);
370	SAXParser parser = factory.newSAXParser();
371	InputSource iSource = new InputSource(new StringReader(xml_str));
372	// parser.parse (iSource, new DefaultHandler ());
373
374	org.xml.sax.XMLReader reader = parser.getXMLReader();
375	reader.setContentHandler(new DefaultHandler());
376	reader.setErrorHandler(new DefaultHandler());
377	reader.parse(iSource);
378	}
379	catch (FactoryConfigurationError e)
380	{
381	validation_msg = "unable to get a document builder factory";
382	}
383	catch (ParserConfigurationException e)
384	{
385	validation_msg = "unable to configure parser";
386	}
387	catch (SAXParseException e)
388	{
389	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390	}
391	catch (SAXException e)
392	{
393	validation_msg += " Fatal error: " + e.toString();
394	}
395	catch (IOException e)
396	{
397	validation_msg = "Unable to read the input, i/o error";
398	}
399
400	return validation_msg;
401	}
402
403	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404	public static String parseDOM(String xml_str)
405	{
406	String validation_msg = WELLFORMED;
407
408	try
409	{
410	SAXParserFactory factory = SAXParserFactory.newInstance();
411	factory.setNamespaceAware(true);
412	//factory.setValidating (true);
413	SAXParser parser = factory.newSAXParser();
414	InputSource iSource = new InputSource(new StringReader(xml_str));
415	// parser.parse (iSource, new DefaultHandler ());
416
417	org.xml.sax.XMLReader reader = parser.getXMLReader();
418	reader.setContentHandler(new DefaultHandler());
419	reader.setErrorHandler(new DefaultHandler());
420	reader.parse(iSource);
421	}
422	catch (FactoryConfigurationError e)
423	{
424	validation_msg = "unable to get a document builder factory";
425	}
426	catch (ParserConfigurationException e)
427	{
428	validation_msg = "unable to configure parser";
429	}
430	catch (SAXParseException e)
431	{
432	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433	}
434	catch (SAXException e)
435	{
436	validation_msg += " " + e.toString();
437	}
438	catch (IOException e)
439	{
440	validation_msg = "Unable to read the input, i/o error";
441	}
442
443	return validation_msg;
444	}
445
446	public static String parse(File xml_file)
447	{
448	String validation_msg = WELLFORMED;
449
450	try
451	{
452	SAXParserFactory factory = SAXParserFactory.newInstance();
453	factory.setNamespaceAware(true);
454	//factory.setValidating (true);
455	SAXParser parser = factory.newSAXParser();
456	FileReader r = new FileReader(xml_file);
457	InputSource iSource = new InputSource(r);
458	XMLReader reader = parser.getXMLReader();
459	reader.setContentHandler(new DefaultHandler());
460	reader.setErrorHandler(new DefaultHandler());
461	reader.parse(iSource);
462	}
463	catch (FactoryConfigurationError e)
464	{
465	validation_msg = "unable to get a document builder factory";
466	}
467	catch (ParserConfigurationException e)
468	{
469	validation_msg = "unable to configure parser";
470	}
471	catch (SAXParseException e)
472	{
473	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474	}
475	catch (SAXException e)
476	{
477	validation_msg += " Fatal error: " + e.toString();
478	}
479	catch (IOException e)
480	{
481	validation_msg = "Unable to read the input, i/o error";
482	}
483
484	return validation_msg;
485	}
486
487	/** Returns a string of the location. */
488	private static String getLocationString(SAXParseException ex)
489	{
490	StringBuffer str = new StringBuffer();
491
492	String systemId = ex.getSystemId();
493	if (systemId != null)
494	{
495	int index = systemId.lastIndexOf('/');
496	if (index != -1)
497	systemId = systemId.substring(index + 1);
498	str.append(systemId);
499	}
500	str.append("(line ");
501	str.append(ex.getLineNumber() - 1);
502	str.append(", column ");
503	str.append(ex.getColumnNumber());
504	str.append("): ");
505
506	return str.toString();
507
508	} // getLocationString(SAXParseException):String
509
510	/** Parse an XML document from a given file path */
511	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512	{
513	if (use_class_loader == true)
514	{
515	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516	if (is != null)
517	{
518	return parseXML(is);
519	}
520	}
521
522	// Try the file outside the classes directory
523	return parseXMLFile(new File(xml_file_path));
524	}
525
526	/** Parse an XML document from a given file */
527	static public Document parseXMLFile(File xml_file)
528	{
529	// No file? No point trying!
530	if (xml_file.exists() == false)
531	{
532	System.err.println("@@@ file " + xml_file + " does not exist.");
533	return null;
534	}
535
536	try
537	{
538	return parseXML(new FileInputStream(xml_file));
539	}
540	catch (Exception exception)
541	{
542	DebugStream.printStackTrace(exception);
543	return null;
544	}
545	}
546
547	/** Parse an XML document from a given input stream */
548	static public Document parseXML(InputStream xml_input_stream)
549	{
550	Document document = null;
551
552	try
553	{
554	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555	document = parseXML(isr);
556	isr.close();
557	xml_input_stream.close();
558	}
559	catch (Exception exception)
560	{
561	DebugStream.printStackTrace(exception);
562	}
563
564	return document;
565	}
566
567	/** Parse an XML document from a given reader */
568	static public Document parseXML(Reader xml_reader)
569	{
570	Document document = null;
571
572	// If debugging, the following will store the XML contents to be parsed,
573	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574	String xmlContents = "";
575
576	try
577	{
578	Reader reader = null;
579
580	// (1) By default, GLI will remove any contents preceeding (and invalidating)
581	// the XML and present these lines separately to the user
582	if (!DebugStream.isDebuggingEnabled())
583	{
584	try
585	{
586	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587	}
588	catch (Exception e)
589	{
590	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591	e.printStackTrace();
592	}
593	}
594
595	// (2) If we are running GLI in debug mode:
596	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597	// idea of where things went wrong. This will print the "XML" contents to either
598	// system.out (if debugging is off) or to the DebugStream otherwise.
599	// We need to read the XML twice to know the line where things went wrong, so
600	// do the additional reading only if we're debugging
601	else
602	{
603	StringBuffer buf = new StringBuffer();
604	char[] buffer = new char[500];
605	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606	while (numCharsRead != -1)
607	{
608	buf.append(buffer, 0, numCharsRead);
609	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610	}
611	xmlContents = buf.toString();
612	xml_reader.close(); // closing the old Reader
613	xml_reader = null;
614	buffer = null;
615	buf = null;
616	// we need a Reader to parse the same contents as the Reader that was just closed
617	reader = new BufferedReader(new StringReader(xmlContents));
618	//System.err.println("xmlContents:\n" + xmlContents);
619	}
620
621	// (2) The actual XML parsing
622	InputSource isc = new InputSource(reader);
623	DOMParser parser = new DOMParser();
624	parser.setFeature("http://xml.org/sax/features/validation", false);
625	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629	parser.setEntityResolver(new GLIEntityResolver());
630	parser.parse(isc);
631	document = parser.getDocument();
632
633	}
634	catch (SAXParseException e)
635	{
636	showXMLParseFailureLine(e, xmlContents);
637	}
638	catch (SAXException exception)
639	{
640	System.err.println("SAX exception: " + exception.getMessage());
641	if (DebugStream.isDebuggingEnabled())
642	{
643	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
644	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646	System.exit(-1);
647	}
648	// else, not running in debug mode, so don't exit after exception
649	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650	DebugStream.printStackTrace(exception);
651	}
652	catch (Exception exception)
653	{
654	DebugStream.printStackTrace(exception);
655	}
656
657	return document;
658	}
659
660	/**
661	* Displays the line (string) where the SAXParseException occurred, given a
662	* String of the entire xml that was being parsed and the SAXParseException
663	* object that was caught. The messages are printed to DebugStream, so run
664	* GLI/FLI with -debug to view this output.
665	*
666	* @param xmlContents
667	* is the entire xml that was being parsed when the exception
668	* occurred
669	* @param e
670	* is the SAXParseException object that was thrown upon parsing
671	* the xmlContents.
672	*/
673	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674	{
675
676	// There should be no characters at all that preceed the <?xml>... bit.
677	// The first check is for starting spaces:
678	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
679	{
680	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682	return; // nothing more to do, first error identified
683	}
684
685	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
686	String line = "";
687	int linenumber = e.getLineNumber();
688	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689	if (DebugStream.isDebuggingEnabled())
690	{
691	if (linenumber != -1)
692	{
693	String[] lines = xmlContents.split("\n");
694	if (lines.length > 0)
695	{
696	DebugStream.println(" (number of lines: " + lines.length + ")");
697	if (lines.length >= linenumber)
698	{
699	line = lines[linenumber - 1];
700	}
701	else
702	{ // error is past the last line
703	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704	}
705	}
706	else
707	{
708	DebugStream.print("\n");
709	}
710	lines = null;
711
712	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
713	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715	// Uncomment if you want to print out the entire contents of the XML doc:
716	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
717	// + xmlContents + "\n************END\n");
718	}
719	else
720	{ // no particular line number, print out all the xml so debugger can inspect it
721	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
722	}
723	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725	System.exit(-1);
726	}
727	else
728	{ // not running in debug mode
729	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730	}
731	}
732
733	static public StringBuffer readXMLStream(InputStream input_stream)
734	{
735	StringBuffer xml = new StringBuffer("");
736
737	try
738	{
739	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
740	BufferedReader buffered_in = new BufferedReader(isr);
741
742	String line = "";
743	boolean xml_content = false;
744	while ((line = buffered_in.readLine()) != null)
745	{
746	if (xml_content)
747	{
748	xml.append(line);
749	xml.append("\n");
750	}
751	else if (line.trim().startsWith("<?xml"))
752	{
753	xml_content = true;
754	xml.append(line);
755	xml.append("\n");
756	}
757	else
758	{
759	System.err.println(line);
760	}
761	}
762	buffered_in = null;
763	}
764	catch (Exception error)
765	{
766	System.err.println("Failed when trying to parse XML stream");
767	error.printStackTrace();
768	}
769
770	return xml;
771	}
772
773	/**
774	* Removes characters that are invalid in XML (see
775	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
776	*/
777	static public String removeInvalidCharacters(String text)
778	{
779	char[] safe_characters = new char[text.length()];
780	int j = 0;
781
782	char[] raw_characters = new char[text.length()];
783	text.getChars(0, text.length(), raw_characters, 0);
784	for (int i = 0; i < raw_characters.length; i++)
785	{
786	char character = raw_characters[i];
787	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
788	{
789	safe_characters[j] = character;
790	j++;
791	}
792	}
793
794	return new String(safe_characters, 0, j);
795	}
796
797	static public void setElementTextValue(Element element, String text)
798	{
799	// Remove all text node children
800	NodeList children_nodelist = element.getChildNodes();
801	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
802	{
803	Node child_node = children_nodelist.item(i);
804	if (child_node.getNodeType() == Node.TEXT_NODE)
805	{
806	element.removeChild(child_node);
807	}
808	}
809
810	// Add a new text node
811	if (text != null)
812	{
813	element.appendChild(element.getOwnerDocument().createTextNode(text));
814	}
815	}
816
817	/**
818	* Set the #text node value of some element.
819	*
820	* @param element
821	* the Element whose value we wish to set
822	* @param value
823	* the new value for the element as a String Soon to be
824	* deprecated!
825	*/
826	static final public void setValue(Element element, String value)
827	{
828	// Remove any existing child node(s)
829	clear(element);
830	// Add new text node.
831	if (value != null)
832	{
833	element.appendChild(element.getOwnerDocument().createTextNode(value));
834	}
835	}
836
837	static public void indentXML(Element elem, int depth)
838	{
839	Document doc = elem.getOwnerDocument();
840
841	String startIndentString = "\n";
842	for (int i = 0; i < depth; i++)
843	{
844	startIndentString += "\t";
845	}
846	Node startTextNode = doc.createTextNode(startIndentString);
847
848	String endIndentString = "\n";
849	for (int i = 0; i < depth - 1; i++)
850	{
851	endIndentString += "\t";
852	}
853	Node endTextNode = doc.createTextNode(endIndentString);
854
855	boolean found = false;
856	Node child = elem.getFirstChild();
857	while (child != null)
858	{
859	// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
860	if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
861	{
862	Node spaceTextNode = child;
863	child = child.getNextSibling();
864	elem.removeChild(spaceTextNode);
865
866	if(child == null) break;
867	}
868
869	// now process normal element nodes as intended
870	if (child.getNodeType() == Node.ELEMENT_NODE)
871	{
872	found = true;
873	break;
874	}
875	child = child.getNextSibling();
876	}
877
878	if (found)
879	{
880	elem.appendChild(endTextNode);
881	}
882
883	child = elem.getFirstChild();
884	while (child != null)
885	{
886	// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
887	// because the first while loop above would break out when it found an element node and wouldn't have got rid
888	// of all the empty text nodes yet.
889	// This time, beware not to delete the special end and start empty textnodes just added, since
890	// they've been created and inserted specifically.
891	if(child != endTextNode && child != startTextNode
892	&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
893	{
894	Node spaceTextNode = child;
895	child = child.getNextSibling();
896	elem.removeChild(spaceTextNode);
897
898	if(child == null) break;
899	}
900
901	// go back to processing normal element nodes as intended
902	if (child.getNodeType() == Node.ELEMENT_NODE)
903	{
904	elem.insertBefore(startTextNode.cloneNode(false), child);
905	indentXML((Element) child, depth + 1);
906	}
907	child = child.getNextSibling();
908	}
909	}
910
911	/**
912	* Write an XML document to a given file with the text node of the specified
913	* element unescaped
914	*/
915	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
916	{
917	indentXML(document.getDocumentElement(), 1);
918	try
919	{
920	OutputStream os = new FileOutputStream(xml_file);
921	// Create an output format for our document.
922	OutputFormat f = new OutputFormat(document);
923	f.setEncoding("UTF-8");
924	f.setIndenting(true);
925	f.setLineWidth(0); // Why isn't this working!
926	f.setPreserveSpace(true);
927	if (nonEscapingTagNames != null)
928	{
929	f.setNonEscapingElements(nonEscapingTagNames);
930	}
931	// Create the necessary writer stream for serialization.
932	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
933	Writer w = new BufferedWriter(osw);
934	// Generate a new serializer from the above.
935	XMLSerializer s = new XMLSerializer(w, f);
936	s.asDOMSerializer();
937	// Finally serialize the document to file.
938	s.serialize(document);
939	// And close.
940	os.close();
941	}
942	catch (Exception exception)
943	{
944	DebugStream.printStackTrace(exception);
945	}
946	}
947
948	/** Write an XML document to a given file */
949	static public void writeXMLFile(File xml_file, Document document)
950	{
951	writeXMLFile(xml_file, document, null);
952	}
953
954	public static void printXMLNode(Node e)
955	{
956	printXMLNode(e, 0);
957	}
958
959	public static void printXMLNode(Node e, int depth)
960	{ //recursive method call using DOM API...
961
962	for (int i = 0; i < depth; i++)
963	System.out.print(' ');
964
965	if (e.getNodeType() == Node.TEXT_NODE)
966	{
967	//System.out.println("text") ;
968	if (e.getNodeValue() != "")
969	{
970	System.out.println(e.getNodeValue());
971	}
972	return;
973	}
974
975	System.out.print('<');
976	System.out.print(e.getNodeName());
977	NamedNodeMap attrs = e.getAttributes();
978	if (attrs != null)
979	{
980	for (int i = 0; i < attrs.getLength(); i++)
981	{
982	Node attr = attrs.item(i);
983	System.out.print(' ');
984	System.out.print(attr.getNodeName());
985	System.out.print("=\"");
986	System.out.print(attr.getNodeValue());
987	System.out.print('"');
988	}
989	}
990	NodeList children = e.getChildNodes();
991
992	if (children == null \|\| children.getLength() == 0)
993	System.out.println("/>");
994	else
995	{
996
997	System.out.println('>');
998
999	int len = children.getLength();
1000	for (int i = 0; i < len; i++)
1001	{
1002	printXMLNode(children.item(i), depth + 1);
1003	}
1004
1005	for (int i = 0; i < depth; i++)
1006	System.out.print(' ');
1007
1008	System.out.println("</" + e.getNodeName() + ">");
1009	}
1010
1011	}
1012
1013	public static String xmlNodeToString(Node e)
1014	{
1015	StringBuffer sb = new StringBuffer("");
1016	xmlNodeToString(sb, e, true, "\t", 2);
1017	return sb.toString();
1018	}
1019
1020	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1021	{
1022
1023	if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1024	{
1025	if (e.getNodeValue() != "")
1026	{
1027	String text = e.getNodeValue();
1028	sb.append("<![CDATA[");
1029	sb.append(text);
1030	sb.append("]]>");
1031	}
1032	return;
1033	}
1034
1035	if (e.getNodeType() == Node.TEXT_NODE)
1036	{
1037	if (e.getNodeValue() != "")
1038	{
1039	String text = e.getNodeValue();
1040	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
1041	for (Character c : text.toCharArray())
1042	{
1043	if (c.equals('\n'))
1044	{
1045	text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1046	break;
1047	}
1048
1049	if (!Character.isWhitespace(c))
1050	{
1051	break;
1052	}
1053	}
1054	sb.append(text);
1055	}
1056	return;
1057	}
1058
1059	if (e.getNodeType() == Node.COMMENT_NODE)
1060	{
1061	if (e.getNodeValue() != "")
1062	{
1063	sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1064	}
1065	return;
1066	}
1067
1068	if (indent)
1069	{
1070	for (int i = 0; i < depth; i++)
1071	{
1072	sb.append(indentString);
1073	}
1074	}
1075
1076	sb.append('<');
1077	sb.append(e.getNodeName());
1078	NamedNodeMap attrs = e.getAttributes();
1079	if (attrs != null)
1080	{
1081	for (int i = 0; i < attrs.getLength(); i++)
1082	{
1083	Node attr = attrs.item(i);
1084	sb.append(' ');
1085	sb.append(attr.getNodeName());
1086	sb.append("=\"");
1087	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
1088	sb.append('"');
1089	}
1090	}
1091	NodeList children = e.getChildNodes();
1092
1093	boolean hasElements = false;
1094	boolean indentSwapped = false;
1095	for (int i = 0; i < children.getLength(); i++)
1096	{
1097	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1098	{
1099	hasElements = true;
1100	}
1101	if ((children.item(i).getNodeType() == Node.TEXT_NODE \|\| children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1102	{
1103	if (children.item(i).getNodeValue().trim().length() > 0)
1104	{
1105	indentSwapped = true;
1106	indent = false;
1107	}
1108	}
1109	}
1110
1111	if (children == null \|\| children.getLength() == 0)
1112	{
1113	sb.append("/>");
1114
1115	if (indent)
1116	{
1117	sb.append("\n");
1118	}
1119	}
1120	else
1121	{
1122	sb.append(">");
1123	if (hasElements && indent)
1124	{
1125	sb.append("\n");
1126	}
1127
1128	int len = children.getLength();
1129	for (int i = 0; i < len; i++)
1130	{
1131	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1132	}
1133
1134	if (indent)
1135	{
1136	for (int i = 0; i < depth; i++)
1137	{
1138	sb.append(indentString);
1139	}
1140	}
1141
1142	sb.append("</" + e.getNodeName() + ">");
1143
1144	if ((hasElements && indent) \|\| indentSwapped)
1145	{
1146	sb.append("\n");
1147	}
1148	}
1149	}
1150
1151	public static String xmlNodeToStringWithoutIndenting(Node e)
1152	{
1153	StringBuffer sb = new StringBuffer("");
1154	xmlNodeToStringWithoutNewline(sb, e, -1);
1155	return sb.toString();
1156	}
1157
1158	public static String xmlNodeToStringWithoutNewline(Node e)
1159	{
1160	StringBuffer sb = new StringBuffer("");
1161	xmlNodeToStringWithoutNewline(sb, e, 0);
1162	return sb.toString();
1163	}
1164
1165	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1166	{
1167
1168	for (int i = 0; i < depth; i++)
1169	{
1170	sb.append(' ');
1171	}
1172
1173	if (e.getNodeType() == Node.TEXT_NODE)
1174	{
1175	if (e.getNodeValue() != "")
1176	{
1177	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1178	}
1179	return;
1180	}
1181
1182	if (e.getNodeType() == Node.COMMENT_NODE)
1183	{
1184	if (e.getNodeValue() != "")
1185	{
1186	sb.append("<!--" + e.getNodeValue() + "-->");
1187	}
1188	return;
1189	}
1190
1191	sb.append('<');
1192	sb.append(e.getNodeName());
1193	NamedNodeMap attrs = e.getAttributes();
1194	if (attrs != null)
1195	{
1196	for (int i = 0; i < attrs.getLength(); i++)
1197	{
1198	Node attr = attrs.item(i);
1199	sb.append(' ');
1200	sb.append(attr.getNodeName());
1201	sb.append("=\"");
1202	sb.append(attr.getNodeValue());
1203	sb.append('"');
1204	}
1205	}
1206	NodeList children = e.getChildNodes();
1207
1208	if (children == null \|\| children.getLength() == 0)
1209	sb.append("/>");
1210	else
1211	{
1212
1213	sb.append(">");
1214
1215	int len = children.getLength();
1216	for (int i = 0; i < len; i++)
1217	{
1218	if (depth >= 0)
1219	{
1220	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1221	}
1222	else
1223	{
1224	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1225	}
1226	}
1227
1228	for (int i = 0; i < depth; i++)
1229	sb.append(' ');
1230
1231	sb.append("</" + e.getNodeName() + ">");
1232	}
1233	}
1234
1235
1236
1237	// This method will convert an Element to a String too, like xmlNodeToString() above.
1238	// But for a document root element (doc.getDocumentElement()), this method will additionally
1239	// return its processing instruction line at the start (<?xml ... ?>).
1240	// This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1241	public static String elementToString(Element e, boolean indent)
1242	{
1243	String str = "";
1244	try
1245	{
1246	TransformerFactory tf = TransformerFactory.newInstance();
1247	Transformer trans = tf.newTransformer();
1248	StringWriter sw = new StringWriter();
1249	if (indent)
1250	{
1251	trans.setOutputProperty(OutputKeys.INDENT, "yes");
1252	}
1253	else
1254	{
1255	trans.setOutputProperty(OutputKeys.INDENT, "no");
1256	}
1257	trans.transform(new DOMSource(e), new StreamResult(sw));
1258	str = sw.toString();
1259	}
1260	catch (Exception ex)
1261	{
1262	str += "Exception: couldn't write " + e + " to log";
1263	}
1264	finally
1265	{
1266	return str;
1267	}
1268	}
1269	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: