Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

XMLTools.java@ 33053

Last change on this file since 33053 was 33053, checked in by ak19, 5 years ago
I still had some stuff of Nathan Kelly's (FileTransfer-WebSocketPair) sitting on my USB. Had already commited the Themes folder at the time, 2 years back. Not sure if he wanted this additional folder commited. But I didn't want to delete it and decided it will be better off on SVN. When we use his project, if we find we didn't need this test folder, we can remove it from svn then.
File size: 33.4 KB

Line
1	package org.greenstone.gatherer.util;
2
3	import java.io.*;
4	import java.net.*;
5	import java.util.*;
6	import org.apache.xerces.parsers.*;
7	import org.apache.xml.serialize.*;
8	import org.greenstone.gatherer.DebugStream;
9	import org.w3c.dom.*;
10	import org.xml.sax.*;
11
12	import java.io.FileReader;
13	import java.io.IOException;
14	import java.io.StringReader;
15	import java.io.StringWriter; // for elementToString()
16
17	// SAX
18	import org.xml.sax.XMLReader;
19	import org.xml.sax.SAXException;
20	import org.xml.sax.SAXParseException;
21	import org.xml.sax.helpers.DefaultHandler;
22	import org.xml.sax.InputSource;
23
24	// JAXP
25	import javax.xml.parsers.DocumentBuilder;
26	import javax.xml.parsers.DocumentBuilderFactory;
27	import javax.xml.parsers.FactoryConfigurationError;
28	import javax.xml.parsers.ParserConfigurationException;
29	import javax.xml.parsers.SAXParser;
30	import javax.xml.parsers.SAXParserFactory;
31	// for elementToString():
32	import javax.xml.transform.OutputKeys;
33	import javax.xml.transform.Transformer;
34	import javax.xml.transform.TransformerFactory;
35	import javax.xml.transform.dom.DOMSource;
36	import javax.xml.transform.stream.StreamResult;
37
38
39	/** This class is a static class containing useful XML functions */
40	public class XMLTools
41	{
42	/** extracts the text out of a node */
43	public static Node getNodeTextNode(Element param)
44	{
45	param.normalize();
46	Node n = param.getFirstChild();
47	while (n != null && n.getNodeType() != Node.TEXT_NODE)
48	{
49	n = n.getNextSibling();
50	}
51	return n;
52	}
53
54	/** extracts the text out of a node */
55	public static String getNodeText(Element param)
56	{
57	Node text_node = getNodeTextNode(param);
58	if (text_node == null)
59	{
60	return "";
61	}
62	return text_node.getNodeValue();
63	}
64
65	public static void setNodeText(Element elem, String text)
66	{
67	Node old_text_node = getNodeTextNode(elem);
68	if (old_text_node != null)
69	{
70	elem.removeChild(old_text_node);
71	}
72	Text t = elem.getOwnerDocument().createTextNode(text);
73	elem.appendChild(t);
74	}
75
76	/** returns the (first) child element with the given name */
77	public static Node getChildByTagName(Node n, String name)
78	{
79
80	Node child = n.getFirstChild();
81	while (child != null)
82	{
83	if (child.getNodeName().equals(name))
84	{
85	return child;
86	}
87	child = child.getNextSibling();
88	}
89	return null; //not found
90	}
91
92	/**
93	* returns the (nth) child element with the given name index numbers start
94	* at 0
95	*/
96	public static Node getChildByTagNameIndexed(Node n, String name, int index)
97	{
98	if (index == -1)
99	{
100	return getChildByTagName(n, name);
101	}
102	int count = 0;
103	Node child = n.getFirstChild();
104	while (child != null)
105	{
106	if (child.getNodeName().equals(name))
107	{
108	if (count == index)
109	{
110	return child;
111	}
112	else
113	{
114	count++;
115	}
116	}
117	child = child.getNextSibling();
118	}
119	return null; //not found
120	}
121
122	/**
123	* returns the element parent/node_name[@attribute_name='attribute_value']
124	*/
125	public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126	{
127
128	NodeList children = parent.getChildNodes();
129	for (int i = 0; i < children.getLength(); i++)
130	{
131	Node child = children.item(i);
132	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
133	if (child.getNodeName().equals(node_name))
134	{
135	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136	return (Element) child;
137	}
138	}
139	// not found
140	return null;
141	}
142
143	/**
144	* returns a list of elements
145	* parent/node_name[@attribute_name='attribute_value']
146	*/
147	public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148	{
149	ArrayList elements = new ArrayList();
150	NodeList children = parent.getChildNodes();
151	for (int i = 0; i < children.getLength(); i++)
152	{
153	//System.out.println("getNamedElementList");
154	Node child = children.item(i);
155	//logger.debug("getnamed elem, node nmae="+child.getNodeName());
156	if (child.getNodeName().equals(node_name))
157	{
158	if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159	elements.add((Element) child);
160	}
161	}
162	// not found
163	if (elements.size() == 0)
164	{
165	elements = null;
166	}
167	return elements;
168	}
169
170	public static void copyAllChildren(Element to, Element from)
171	{
172
173	Document to_doc = to.getOwnerDocument();
174	Node child = from.getFirstChild();
175	while (child != null)
176	{
177	to.appendChild(to_doc.importNode(child, true));
178	child = child.getNextSibling();
179	}
180	}
181
182	/** duplicates all elements in list elements and appends to toElement */
183	public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184	int num_elems = elements.getLength();
185	if (num_elems < 1)
186	{
187	return;
188	}
189	for (int i = 0; i < num_elems; i++)
190	{
191	Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192	toElement.appendChild(to_element);
193	}
194
195	}
196	/** Duplicates an element */
197	public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198	{
199	return duplicateElementNS(owner, element, null, with_attributes);
200	}
201
202	/** Duplicates an element */
203	public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204	{
205	Element duplicate;
206	if (namespace_uri == null)
207	{
208	duplicate = owner.createElement(element.getTagName());
209	}
210	else
211	{
212	duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213	}
214	// Copy element attributes
215	if (with_attributes)
216	{
217	NamedNodeMap attributes = element.getAttributes();
218	for (int i = 0; i < attributes.getLength(); i++)
219	{
220	Node attribute = attributes.item(i);
221	duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222	}
223	}
224
225	// Copy element children
226	NodeList children = element.getChildNodes();
227	for (int i = 0; i < children.getLength(); i++)
228	{
229	Node child = children.item(i);
230	duplicate.appendChild(owner.importNode(child, true));
231	}
232
233	return duplicate;
234	}
235
236	/** Remove all of the child nodes from a certain node. */
237	static final public void clear(Node node)
238	{
239	while (node.hasChildNodes())
240	{
241	node.removeChild(node.getFirstChild());
242	}
243	}
244
245	static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246	{
247	ArrayList child_elements = new ArrayList();
248
249	NodeList children_nodelist = parent_element.getChildNodes();
250	for (int i = 0; i < children_nodelist.getLength(); i++)
251	{
252	Node child_node = children_nodelist.item(i);
253	if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254	{
255	child_elements.add(child_node);
256	}
257	}
258
259	return child_elements;
260	}
261
262	static public String getElementTextValue(Element element)
263	{
264	// Find the first text node child
265	NodeList children_nodelist = element.getChildNodes();
266	for (int i = 0; i < children_nodelist.getLength(); i++)
267	{
268	Node child_node = children_nodelist.item(i);
269	if (child_node.getNodeType() == Node.TEXT_NODE)
270	{
271	return child_node.getNodeValue();
272	}
273	}
274
275	// None found
276	return "";
277	}
278
279	/**
280	* Method to retrieve the value of a given node.
281	*
282	* @param element
283	* The <strong>Element</strong> whose value we wish to find. Soon
284	* to be deprecated!
285	*/
286	static final public String getValue(Node element)
287	{
288	if (element == null)
289	{
290	return "";
291	}
292	// If we've been given a subject node first retrieve its value node.
293	if (element.getNodeName().equals("Subject"))
294	{
295	element = getNodeFromNamed(element, "Value");
296	}
297	// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298	if (element != null && element.hasChildNodes())
299	{
300	StringBuffer text_buffer = new StringBuffer();
301	NodeList text_nodes = element.getChildNodes();
302	for (int i = 0; i < text_nodes.getLength(); i++)
303	{
304	Node possible_text = text_nodes.item(i);
305	if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306	{
307	text_buffer.append(possible_text.getNodeValue());
308	}
309	}
310	return text_buffer.toString();
311	}
312	return "";
313	}
314
315	/**
316	* Method to retrieve from the node given, a certain child node with the
317	* specified name.
318	*
319	* @param parent
320	* The <strong>Node</strong> whose children should be searched.
321	* @param name
322	* The required nodes name as a <strong>String</strong>.
323	* @return The requested <strong>Node</strong> if it is found, <i>null</i>
324	* otherwise. Soon to be deprecated!
325	*/
326	static final public Node getNodeFromNamed(Node parent, String name)
327	{
328	Node child = null;
329	for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330	{
331	if (i.getNodeName().equals(name))
332	{
333	child = i;
334	}
335	}
336	return child;
337	}
338
339	static final public String WELLFORMED = "well-formed !";
340	static final public String NOTWELLFORMED = "not well-formed";
341	static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:gslib='http://www.greenstone.org/skinning' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342	static final private String FOOTER = "</collectionConfig>";
343
344
345	public static Document getDOM(String xml_str)
346	{
347	Document doc = null;
348	try {
349
350	DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351	InputSource is = new InputSource();
352	is.setCharacterStream(new StringReader(xml_str));
353	doc = db.parse(is);
354
355	} catch (Exception e) {
356	e.printStackTrace();
357	}
358	return doc;
359	}
360
361	public static String parse(String xml_str)
362	{
363	String validation_msg = WELLFORMED;
364	xml_str = HEADER + xml_str + FOOTER;
365	try
366	{
367	SAXParserFactory factory = SAXParserFactory.newInstance();
368	factory.setNamespaceAware(true);
369	//factory.setValidating (true);
370	SAXParser parser = factory.newSAXParser();
371	InputSource iSource = new InputSource(new StringReader(xml_str));
372	// parser.parse (iSource, new DefaultHandler ());
373
374	org.xml.sax.XMLReader reader = parser.getXMLReader();
375	reader.setContentHandler(new DefaultHandler());
376	reader.setErrorHandler(new DefaultHandler());
377	reader.parse(iSource);
378	}
379	catch (FactoryConfigurationError e)
380	{
381	validation_msg = "unable to get a document builder factory";
382	}
383	catch (ParserConfigurationException e)
384	{
385	validation_msg = "unable to configure parser";
386	}
387	catch (SAXParseException e)
388	{
389	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390	}
391	catch (SAXException e)
392	{
393	validation_msg += " Fatal error: " + e.toString();
394	}
395	catch (IOException e)
396	{
397	validation_msg = "Unable to read the input, i/o error";
398	}
399
400	return validation_msg;
401	}
402
403	//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404	public static String parseDOM(String xml_str)
405	{
406	String validation_msg = WELLFORMED;
407
408	try
409	{
410	SAXParserFactory factory = SAXParserFactory.newInstance();
411	factory.setNamespaceAware(true);
412	//factory.setValidating (true);
413	SAXParser parser = factory.newSAXParser();
414	InputSource iSource = new InputSource(new StringReader(xml_str));
415	// parser.parse (iSource, new DefaultHandler ());
416
417	org.xml.sax.XMLReader reader = parser.getXMLReader();
418	reader.setContentHandler(new DefaultHandler());
419	reader.setErrorHandler(new DefaultHandler());
420	reader.parse(iSource);
421	}
422	catch (FactoryConfigurationError e)
423	{
424	validation_msg = "unable to get a document builder factory";
425	}
426	catch (ParserConfigurationException e)
427	{
428	validation_msg = "unable to configure parser";
429	}
430	catch (SAXParseException e)
431	{
432	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433	}
434	catch (SAXException e)
435	{
436	validation_msg += " " + e.toString();
437	}
438	catch (IOException e)
439	{
440	validation_msg = "Unable to read the input, i/o error";
441	}
442
443	return validation_msg;
444	}
445
446	public static String parse(File xml_file)
447	{
448	String validation_msg = WELLFORMED;
449
450	try
451	{
452	SAXParserFactory factory = SAXParserFactory.newInstance();
453	factory.setNamespaceAware(true);
454	//factory.setValidating (true);
455	SAXParser parser = factory.newSAXParser();
456	FileReader r = new FileReader(xml_file);
457	InputSource iSource = new InputSource(r);
458	XMLReader reader = parser.getXMLReader();
459	reader.setContentHandler(new DefaultHandler());
460	reader.setErrorHandler(new DefaultHandler());
461	reader.parse(iSource);
462	}
463	catch (FactoryConfigurationError e)
464	{
465	validation_msg = "unable to get a document builder factory";
466	}
467	catch (ParserConfigurationException e)
468	{
469	validation_msg = "unable to configure parser";
470	}
471	catch (SAXParseException e)
472	{
473	validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474	}
475	catch (SAXException e)
476	{
477	validation_msg += " Fatal error: " + e.toString();
478	}
479	catch (IOException e)
480	{
481	validation_msg = "Unable to read the input, i/o error";
482	}
483
484	return validation_msg;
485	}
486
487	/** Returns a string of the location. */
488	private static String getLocationString(SAXParseException ex)
489	{
490	StringBuffer str = new StringBuffer();
491
492	String systemId = ex.getSystemId();
493	if (systemId != null)
494	{
495	int index = systemId.lastIndexOf('/');
496	if (index != -1)
497	systemId = systemId.substring(index + 1);
498	str.append(systemId);
499	}
500	str.append("(line ");
501	str.append(ex.getLineNumber() - 1);
502	str.append(", column ");
503	str.append(ex.getColumnNumber());
504	str.append("): ");
505
506	return str.toString();
507
508	} // getLocationString(SAXParseException):String
509
510	/** Parse an XML document from a given file path */
511	static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512	{
513	if (use_class_loader == true)
514	{
515	InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516	if (is != null)
517	{
518	return parseXML(is);
519	}
520	}
521
522	// Try the file outside the classes directory
523	return parseXMLFile(new File(xml_file_path));
524	}
525
526	/** Parse an XML document from a given file */
527	static public Document parseXMLFile(File xml_file)
528	{
529	// No file? No point trying!
530	if (xml_file.exists() == false)
531	{
532	// System.err.println("@@@ file " + xml_file + " does not exist.");
533	return null;
534	}
535
536	try
537	{
538	return parseXML(new FileInputStream(xml_file));
539	}
540	catch (Exception exception)
541	{
542	DebugStream.printStackTrace(exception);
543	return null;
544	}
545	}
546
547	/** Parse an XML document from a given input stream */
548	static public Document parseXML(InputStream xml_input_stream)
549	{
550	Document document = null;
551
552	try
553	{
554	InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555	document = parseXML(isr);
556	isr.close();
557	xml_input_stream.close();
558	}
559	catch (Exception exception)
560	{
561	DebugStream.printStackTrace(exception);
562	}
563
564	return document;
565	}
566
567	/** Parse an XML document from a given reader */
568	static public Document parseXML(Reader xml_reader)
569	{
570	Document document = null;
571
572	// If debugging, the following will store the XML contents to be parsed,
573	// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574	String xmlContents = "";
575
576	try
577	{
578	Reader reader = null;
579
580	// (1) By default, GLI will remove any contents preceeding (and invalidating)
581	// the XML and present these lines separately to the user
582	if (!DebugStream.isDebuggingEnabled())
583	{
584	try
585	{
586	reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587	}
588	catch (Exception e)
589	{
590	System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591	e.printStackTrace();
592	}
593	}
594
595	// (2) If we are running GLI in debug mode:
596	// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597	// idea of where things went wrong. This will print the "XML" contents to either
598	// system.out (if debugging is off) or to the DebugStream otherwise.
599	// We need to read the XML twice to know the line where things went wrong, so
600	// do the additional reading only if we're debugging
601	else
602	{
603	StringBuffer buf = new StringBuffer();
604	char[] buffer = new char[500];
605	int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606	while (numCharsRead != -1)
607	{
608	buf.append(buffer, 0, numCharsRead);
609	numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610	}
611	xmlContents = buf.toString();
612	xml_reader.close(); // closing the old Reader
613	xml_reader = null;
614	buffer = null;
615	buf = null;
616	// we need a Reader to parse the same contents as the Reader that was just closed
617	reader = new BufferedReader(new StringReader(xmlContents));
618	//System.err.println("xmlContents:\n" + xmlContents);
619	}
620
621	// (2) The actual XML parsing
622	InputSource isc = new InputSource(reader);
623	DOMParser parser = new DOMParser();
624	parser.setFeature("http://xml.org/sax/features/validation", false);
625	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626	// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628	parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629	parser.setEntityResolver(new GLIEntityResolver());
630	parser.parse(isc);
631	document = parser.getDocument();
632
633	}
634	catch (SAXParseException e)
635	{
636	showXMLParseFailureLine(e, xmlContents);
637	}
638	catch (SAXException exception)
639	{
640	System.err.println("SAX exception: " + exception.getMessage());
641	if (DebugStream.isDebuggingEnabled())
642	{
643	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
644	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645	DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646	System.exit(-1);
647	}
648	// else, not running in debug mode, so don't exit after exception
649	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650	DebugStream.printStackTrace(exception);
651	}
652	catch (Exception exception)
653	{
654	DebugStream.printStackTrace(exception);
655	}
656
657	return document;
658	}
659
660	/**
661	* Displays the line (string) where the SAXParseException occurred, given a
662	* String of the entire xml that was being parsed and the SAXParseException
663	* object that was caught. The messages are printed to DebugStream, so run
664	* GLI/FLI with -debug to view this output.
665	*
666	* @param xmlContents
667	* is the entire xml that was being parsed when the exception
668	* occurred
669	* @param e
670	* is the SAXParseException object that was thrown upon parsing
671	* the xmlContents.
672	*/
673	public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674	{
675
676	// There should be no characters at all that preceed the <?xml>... bit.
677	// The first check is for starting spaces:
678	if (xmlContents.startsWith("\n") \|\| xmlContents.startsWith(" ") \|\| xmlContents.startsWith("\t"))
679	{
680	DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681	DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682	return; // nothing more to do, first error identified
683	}
684
685	// the actual line (String literal) where parsing failed and the SAXParseException occurred.
686	String line = "";
687	int linenumber = e.getLineNumber();
688	DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689	if (DebugStream.isDebuggingEnabled())
690	{
691	if (linenumber != -1)
692	{
693	String[] lines = xmlContents.split("\n");
694	if (lines.length > 0)
695	{
696	DebugStream.println(" (number of lines: " + lines.length + ")");
697	if (lines.length >= linenumber)
698	{
699	line = lines[linenumber - 1];
700	}
701	else
702	{ // error is past the last line
703	line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704	}
705	}
706	else
707	{
708	DebugStream.print("\n");
709	}
710	lines = null;
711
712	DebugStream.println("The parsing error occurred on this line:\n*********START\n" + line + "\n*********END");
713	DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715	// Uncomment if you want to print out the entire contents of the XML doc:
716	//DebugStream.println("\n\nThis was the XML:\n*********START\n"
717	// + xmlContents + "\n************END\n");
718	}
719	else
720	{ // no particular line number, print out all the xml so debugger can inspect it
721	DebugStream.println("Encountered a SAX exception when parsing the following:\n*******START\n" + xmlContents + "\n**********END\n");
722	}
723	// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724	DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725	System.exit(-1);
726	}
727	else
728	{ // not running in debug mode
729	System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730	}
731	}
732
733	static public StringBuffer readXMLStream(InputStream input_stream)
734	{
735	StringBuffer xml = new StringBuffer("");
736
737	try
738	{
739	InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
740	BufferedReader buffered_in = new BufferedReader(isr);
741
742	String line = "";
743	boolean xml_content = false;
744	while ((line = buffered_in.readLine()) != null)
745	{
746	if (xml_content)
747	{
748	xml.append(line);
749	xml.append("\n");
750	}
751	else if (line.trim().startsWith("<?xml"))
752	{
753	xml_content = true;
754	xml.append(line);
755	xml.append("\n");
756	}
757	else
758	{
759	System.err.println(line);
760	}
761	}
762	buffered_in = null;
763	}
764	catch (Exception error)
765	{
766	System.err.println("Failed when trying to parse XML stream");
767	error.printStackTrace();
768	}
769
770	return xml;
771	}
772
773	/**
774	* Removes characters that are invalid in XML (see
775	* http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
776	*/
777	static public String removeInvalidCharacters(String text)
778	{
779	char[] safe_characters = new char[text.length()];
780	int j = 0;
781
782	char[] raw_characters = new char[text.length()];
783	text.getChars(0, text.length(), raw_characters, 0);
784	for (int i = 0; i < raw_characters.length; i++)
785	{
786	char character = raw_characters[i];
787	if ((character >= 0x20 && character <= 0xD7FF) \|\| character == 0x09 \|\| character == 0x0A \|\| character == 0x0D \|\| (character >= 0xE000 && character <= 0xFFFD) \|\| (character >= 0x10000 && character <= 0x10FFFF))
788	{
789	safe_characters[j] = character;
790	j++;
791	}
792	}
793
794	return new String(safe_characters, 0, j);
795	}
796
797	static public void setElementTextValue(Element element, String text)
798	{
799	// Remove all text node children
800	NodeList children_nodelist = element.getChildNodes();
801	for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
802	{
803	Node child_node = children_nodelist.item(i);
804	if (child_node.getNodeType() == Node.TEXT_NODE)
805	{
806	element.removeChild(child_node);
807	}
808	}
809
810	// Add a new text node
811	if (text != null)
812	{
813	element.appendChild(element.getOwnerDocument().createTextNode(text));
814	}
815	}
816
817	/**
818	* Set the #text node value of some element.
819	*
820	* @param element
821	* the Element whose value we wish to set
822	* @param value
823	* the new value for the element as a String Soon to be
824	* deprecated!
825	*/
826	static final public void setValue(Element element, String value)
827	{
828	// Remove any existing child node(s)
829	clear(element);
830	// Add new text node.
831	if (value != null)
832	{
833	element.appendChild(element.getOwnerDocument().createTextNode(value));
834	}
835	}
836
837	static public void indentXML(Element elem, int depth)
838	{
839	Document doc = elem.getOwnerDocument();
840
841	String startIndentString = "\n";
842	for (int i = 0; i < depth; i++)
843	{
844	startIndentString += "\t";
845	}
846	Node startTextNode = doc.createTextNode(startIndentString);
847
848	String endIndentString = "\n";
849	for (int i = 0; i < depth - 1; i++)
850	{
851	endIndentString += "\t";
852	}
853	Node endTextNode = doc.createTextNode(endIndentString);
854
855	boolean found = false;
856	Node child = elem.getFirstChild();
857	while (child != null)
858	{
859	// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
860	if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
861	{
862	Node spaceTextNode = child;
863	child = child.getNextSibling();
864	elem.removeChild(spaceTextNode);
865
866	if(child == null) break;
867	}
868
869	// now process normal element nodes as intended
870	if (child.getNodeType() == Node.ELEMENT_NODE)
871	{
872	found = true;
873	break;
874	}
875	child = child.getNextSibling();
876	}
877
878	if (found)
879	{
880	elem.appendChild(endTextNode);
881	}
882
883	child = elem.getFirstChild();
884	while (child != null)
885	{
886	// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
887	// because the first while loop above would break out when it found an element node and wouldn't have got rid
888	// of all the empty text nodes yet.
889	// This time, beware not to delete the special end and start empty textnodes just added, since
890	// they've been created and inserted specifically.
891	if(child != endTextNode && child != startTextNode
892	&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
893	{
894	Node spaceTextNode = child;
895	child = child.getNextSibling();
896	elem.removeChild(spaceTextNode);
897
898	if(child == null) break;
899	}
900
901	// go back to processing normal element nodes as intended
902	if (child.getNodeType() == Node.ELEMENT_NODE)
903	{
904	elem.insertBefore(startTextNode.cloneNode(false), child);
905	indentXML((Element) child, depth + 1);
906	}
907	child = child.getNextSibling();
908	}
909	}
910
911	/**
912	* Write an XML document to a given file with the text node of the specified
913	* element unescaped
914	*/
915	static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
916	{
917	indentXML(document.getDocumentElement(), 1);
918	try
919	{
920	OutputStream os = new FileOutputStream(xml_file);
921	// Create an output format for our document.
922	OutputFormat f = new OutputFormat(document);
923	f.setEncoding("UTF-8");
924	f.setIndenting(true);
925	f.setLineWidth(0); // Why isn't this working!
926	f.setPreserveSpace(true);
927	if (nonEscapingTagNames != null)
928	{
929	f.setNonEscapingElements(nonEscapingTagNames);
930	}
931	// Create the necessary writer stream for serialization.
932	OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
933	Writer w = new BufferedWriter(osw);
934	// Generate a new serializer from the above.
935	XMLSerializer s = new XMLSerializer(w, f);
936	s.asDOMSerializer();
937	// Finally serialize the document to file.
938	s.serialize(document);
939	// And close.
940	os.close();
941	}
942	catch (Exception exception)
943	{
944	DebugStream.printStackTrace(exception);
945	}
946	}
947
948	/** Write an XML document to a given file */
949	static public void writeXMLFile(File xml_file, Document document)
950	{
951	writeXMLFile(xml_file, document, null);
952	}
953
954	public static void printXMLNode(Node e)
955	{
956	printXMLNode(e, 0);
957	}
958
959	public static void printXMLNode(Node e, int depth)
960	{ //recursive method call using DOM API...
961
962	for (int i = 0; i < depth; i++)
963	System.out.print(' ');
964
965	if (e.getNodeType() == Node.TEXT_NODE)
966	{
967	//System.out.println("text") ;
968	if (e.getNodeValue() != "")
969	{
970	System.out.println(e.getNodeValue());
971	}
972	return;
973	}
974
975	System.out.print('<');
976	System.out.print(e.getNodeName());
977	NamedNodeMap attrs = e.getAttributes();
978	if (attrs != null)
979	{
980	for (int i = 0; i < attrs.getLength(); i++)
981	{
982	Node attr = attrs.item(i);
983	System.out.print(' ');
984	System.out.print(attr.getNodeName());
985	System.out.print("=\"");
986	System.out.print(attr.getNodeValue());
987	System.out.print('"');
988	}
989	}
990	NodeList children = e.getChildNodes();
991
992	if (children == null \|\| children.getLength() == 0)
993	System.out.println("/>");
994	else
995	{
996
997	System.out.println('>');
998
999	int len = children.getLength();
1000	for (int i = 0; i < len; i++)
1001	{
1002	printXMLNode(children.item(i), depth + 1);
1003	}
1004
1005	for (int i = 0; i < depth; i++)
1006	System.out.print(' ');
1007
1008	System.out.println("</" + e.getNodeName() + ">");
1009	}
1010
1011	}
1012
1013	public static String xmlNodeToString(Node e)
1014	{
1015	StringBuffer sb = new StringBuffer("");
1016	xmlNodeToString(sb, e, true, "\t", 2);
1017	return sb.toString();
1018	}
1019
1020	public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1021	{
1022
1023	if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1024	{
1025	if (e.getNodeValue() != "")
1026	{
1027	String text = e.getNodeValue();
1028	sb.append("<![CDATA[");
1029	sb.append(text);
1030	sb.append("]]>");
1031	}
1032	return;
1033	}
1034
1035	if (e.getNodeType() == Node.TEXT_NODE)
1036	{
1037	if (e.getNodeValue() != "")
1038	{
1039	String text = e.getNodeValue();
1040	text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
1041	for (Character c : text.toCharArray())
1042	{
1043	if (c.equals('\n'))
1044	{
1045	text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1046	break;
1047	}
1048
1049	if (!Character.isWhitespace(c))
1050	{
1051	break;
1052	}
1053	}
1054	sb.append(text);
1055	}
1056	return;
1057	}
1058
1059	if (e.getNodeType() == Node.COMMENT_NODE)
1060	{
1061	if (e.getNodeValue() != "")
1062	{
1063	sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1064	}
1065	return;
1066	}
1067
1068	if (indent)
1069	{
1070	for (int i = 0; i < depth; i++)
1071	{
1072	sb.append(indentString);
1073	}
1074	}
1075
1076	sb.append('<');
1077	sb.append(e.getNodeName());
1078	NamedNodeMap attrs = e.getAttributes();
1079	if (attrs != null)
1080	{
1081	for (int i = 0; i < attrs.getLength(); i++)
1082	{
1083	Node attr = attrs.item(i);
1084	sb.append(' ');
1085	sb.append(attr.getNodeName());
1086	sb.append("=\"");
1087	sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
1088	sb.append('"');
1089	}
1090	}
1091	NodeList children = e.getChildNodes();
1092
1093	boolean hasElements = false;
1094	boolean indentSwapped = false;
1095	for (int i = 0; i < children.getLength(); i++)
1096	{
1097	if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1098	{
1099	hasElements = true;
1100	}
1101	if ((children.item(i).getNodeType() == Node.TEXT_NODE \|\| children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1102	{
1103	if (children.item(i).getNodeValue().trim().length() > 0)
1104	{
1105	indentSwapped = true;
1106	indent = false;
1107	}
1108	}
1109	}
1110
1111	if (children == null \|\| children.getLength() == 0)
1112	{
1113	sb.append("/>");
1114
1115	if (indent)
1116	{
1117	sb.append("\n");
1118	}
1119	}
1120	else
1121	{
1122	sb.append(">");
1123	if (hasElements && indent)
1124	{
1125	sb.append("\n");
1126	}
1127
1128	int len = children.getLength();
1129	for (int i = 0; i < len; i++)
1130	{
1131	xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1132	}
1133
1134	if (indent)
1135	{
1136	for (int i = 0; i < depth; i++)
1137	{
1138	sb.append(indentString);
1139	}
1140	}
1141
1142	sb.append("</" + e.getNodeName() + ">");
1143
1144	if ((hasElements && indent) \|\| indentSwapped)
1145	{
1146	sb.append("\n");
1147	}
1148	}
1149	}
1150
1151	public static String xmlNodeToStringWithoutIndenting(Node e)
1152	{
1153	StringBuffer sb = new StringBuffer("");
1154	xmlNodeToStringWithoutNewline(sb, e, -1);
1155	return sb.toString();
1156	}
1157
1158	public static String xmlNodeToStringWithoutNewline(Node e)
1159	{
1160	StringBuffer sb = new StringBuffer("");
1161	xmlNodeToStringWithoutNewline(sb, e, 0);
1162	return sb.toString();
1163	}
1164
1165	private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1166	{
1167
1168	for (int i = 0; i < depth; i++)
1169	{
1170	sb.append(' ');
1171	}
1172
1173	if (e.getNodeType() == Node.TEXT_NODE)
1174	{
1175	if (e.getNodeValue() != "")
1176	{
1177	sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
1178	}
1179	return;
1180	}
1181
1182	if (e.getNodeType() == Node.COMMENT_NODE)
1183	{
1184	if (e.getNodeValue() != "")
1185	{
1186	sb.append("<!--" + e.getNodeValue() + "-->");
1187	}
1188	return;
1189	}
1190
1191	sb.append('<');
1192	sb.append(e.getNodeName());
1193	NamedNodeMap attrs = e.getAttributes();
1194	if (attrs != null)
1195	{
1196	for (int i = 0; i < attrs.getLength(); i++)
1197	{
1198	Node attr = attrs.item(i);
1199	sb.append(' ');
1200	sb.append(attr.getNodeName());
1201	sb.append("=\"");
1202	sb.append(attr.getNodeValue());
1203	sb.append('"');
1204	}
1205	}
1206	NodeList children = e.getChildNodes();
1207
1208	if (children == null \|\| children.getLength() == 0)
1209	sb.append("/>");
1210	else
1211	{
1212
1213	sb.append(">");
1214
1215	int len = children.getLength();
1216	for (int i = 0; i < len; i++)
1217	{
1218	if (depth >= 0)
1219	{
1220	xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1221	}
1222	else
1223	{
1224	xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1225	}
1226	}
1227
1228	for (int i = 0; i < depth; i++)
1229	sb.append(' ');
1230
1231	sb.append("</" + e.getNodeName() + ">");
1232	}
1233	}
1234
1235
1236
1237	// This method will convert an Element to a String too, like xmlNodeToString() above.
1238	// But for a document root element (doc.getDocumentElement()), this method will additionally
1239	// return its processing instruction line at the start (<?xml ... ?>).
1240	// This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1241	public static String elementToString(Element e, boolean indent)
1242	{
1243	String str = "";
1244	try
1245	{
1246	TransformerFactory tf = TransformerFactory.newInstance();
1247	Transformer trans = tf.newTransformer();
1248	StringWriter sw = new StringWriter();
1249	if (indent)
1250	{
1251	trans.setOutputProperty(OutputKeys.INDENT, "yes");
1252	}
1253	else
1254	{
1255	trans.setOutputProperty(OutputKeys.INDENT, "no");
1256	}
1257	trans.transform(new DOMSource(e), new StreamResult(sw));
1258	str = sw.toString();
1259	}
1260	catch (Exception ex)
1261	{
1262	str += "Exception: couldn't write " + e + " to log";
1263	}
1264	finally
1265	{
1266	return str;
1267	}
1268	}
1269	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/FileTransfer-WebSocketPair/testGXTWithGreenstone/src/org/greenstone/gatherer/util/XMLTools.java@ 33053

Download in other formats: