Context Navigation

source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 30477

Last change on this file since 30477 was 30477, checked in by davidb, 8 years ago
Changes in the Java code to support the new approach taken to client-side XSLT (using Saxon-CE JS library in the browser -- see next commit). Also some better error reporting when processing XSLT files
Property svn:keywords set to `Author Date Id Revision`
File size: 17.7 KB

Line
1	/*
2	* XMLConverter.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.util;
20
21	// XML classes
22	import org.w3c.dom.DOMImplementation;
23	import org.w3c.dom.Document;
24	import org.w3c.dom.DocumentType;
25	import org.w3c.dom.Element;
26	import org.w3c.dom.Node;
27	import org.w3c.dom.NodeList;
28	import org.w3c.dom.NamedNodeMap;
29	import org.xml.sax.InputSource;
30	import org.xml.sax.EntityResolver;
31	import org.xml.sax.ErrorHandler;
32	import org.xml.sax.SAXParseException;
33	import org.xml.sax.SAXNotRecognizedException;
34	import org.xml.sax.SAXNotSupportedException;
35	import org.apache.xerces.parsers.DOMParser;
36	import org.apache.xerces.dom.DocumentImpl; // for new Documents
37	import org.apache.xerces.dom.DocumentTypeImpl;
38
39	// other java classes
40	import java.io.BufferedWriter;
41	import java.io.ByteArrayInputStream;
42	import java.io.FileWriter;
43	import java.io.InputStream;
44	import java.io.Reader;
45	import java.io.InputStreamReader;
46	import java.io.StringReader;
47	import java.io.File;
48	import java.io.FileInputStream;
49	import java.io.FileReader;
50	import java.util.regex.*;
51
52	import org.apache.log4j.*;
53
54	// Apache Commons
55	import org.apache.commons.lang3.*;
56
57	import java.util.*;
58	import java.lang.reflect.*;
59
60	/**
61	* XMLConverter - utility class for greenstone
62	*
63	* generates new Documents
64	* parses XML Strings into Documents, converts Nodes to Strings
65	* different parsers have different behaviour - can experiment in here
66	* at the moment we only use xerces
67	* all xerces specific code is in here
68	*/
69	public class XMLConverter
70	{
71
72	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
73
74	/** the no-args constructor */
75	public XMLConverter()
76	{
77
78	}
79
80	/** returns a DOM Document */
81	public static Document getDOM(String in)
82	{
83
84	try
85	{
86	Reader reader = new StringReader(in);
87	InputSource xml_source = new InputSource(reader);
88	Document doc = getDOM(xml_source, null);
89	reader.close();
90	return doc;
91
92	}
93	catch (Exception e)
94	{
95	logger.error(e.getMessage());
96	logger.error("Input string was:\n" + in);
97	e.printStackTrace();
98	}
99	return null;
100	}
101
102	/** returns a DOM Document */
103	public static Document getDOM(String in, String encoding)
104	{
105	try
106	{
107	InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(in.getBytes(encoding)), encoding);
108	InputSource xml_source = new InputSource(reader);
109	Document doc = getDOM(xml_source, null);
110	reader.close();
111	return doc;
112
113	}
114	catch (Exception e)
115	{
116	logger.error(e.getMessage());
117	logger.error("Input string was:\n" + in);
118	e.printStackTrace();
119	}
120	return null;
121	}
122
123	/** returns a DOM Document */
124	public static Document getDOM(File in) {
125	try
126	{
127	FileReader reader = new FileReader(in);
128	InputSource xml_source = new InputSource(reader);
129	Document doc = getDOM(xml_source, null);
130	reader.close();
131	return doc;
132
133	}
134	catch (Exception e)
135	{
136	logger.error(e.getMessage(), e);
137	logger.error("File was:\n" + in.getPath());
138	e.printStackTrace();
139
140	}
141	return null;
142	}
143
144	public static Document getDOM(File in, String encoding) {
145	return getDOM(in, encoding, null);
146	}
147
148	/** returns a DOM document */
149	public static Document getDOM(File in, String encoding, EntityResolver er) {
150
151	try {
152
153
154	InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
155	InputSource xml_source = new InputSource(isr);
156	Document doc = getDOM(xml_source, er);
157	isr.close();
158	return doc;
159
160	}
161	catch (Exception e)
162	{
163	logger.error(e.getMessage());
164	logger.error("File was:\n" + in.getPath());
165	e.printStackTrace();
166	}
167	return null;
168	}
169
170	public static Document getDOM(File in, EntityResolver er) {
171
172	try {
173	InputSource xml_source = new InputSource(new FileInputStream(in));
174	Document doc = getDOM(xml_source, er);
175	return doc;
176	}
177	catch (Exception e)
178	{
179	logger.error(e.getMessage());
180	logger.error("File was:\n" + in.getPath());
181	e.printStackTrace();
182	}
183	return null;
184	}
185
186	public static Document getDOM(InputSource source, EntityResolver er) {
187
188	try {
189	DOMParser parser = new DOMParser();
190	parser.setFeature("http://xml.org/sax/features/validation", false);
191	// don't try and load external DTD - no need if we are not validating, and may cause connection errors if a proxy is not set up.
192	parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
193	// a performance test showed that having this on lead to increased
194	// memory use for small-medium docs, and not much gain for large
195	// docs.
196	// http://www.sosnoski.com/opensrc/xmlbench/conclusions.html
197	parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
198	// add an errorhandler to the parser which will output messages on encountering fatal errors, errors and warnings when parsing
199	parser.setErrorHandler(new ParseErrorHandler());
200	if (er != null) {
201	parser.setEntityResolver(er);
202	}
203	parser.parse(source);
204
205	Document doc = parser.getDocument();
206	return doc;
207
208	} catch (Exception e) {
209
210	logger.error(e.getMessage());
211	logger.error("InputSource:\n");
212	e.printStackTrace();
213	}
214	return null;
215
216	}
217
218	/** creates a new empty DOM Document */
219	public static Document newDOM()
220	{
221	Document doc = new DocumentImpl();
222	return doc;
223	}
224
225	/**
226	* This method's parameters represent the parts of the Doctype of this
227	* Document that is to be created. For more info see
228	* http://xerces.apache.org
229	* /xerces-j/apiDocs/org/apache/xerces/dom/DocumentTypeImpl
230	* .html#DocumentTypeImpl
231	* (org.apache.xerces.dom.CoreDocumentImpl,%20java.lang.String)
232	*
233	* */
234	public static Document newDOM(String qualifiedName, String publicID, String systemID)
235	{
236	// create empty DOM document
237	DocumentImpl docImpl = new DocumentImpl();
238
239	// Need to use the document to create the docType for it
240	DocumentType myDocType = new DocumentTypeImpl(docImpl, qualifiedName, publicID, systemID);
241
242	// Although we have created the docType using the document, we need to still
243	// put it into the empty document we just created
244	try
245	{
246	docImpl.appendChild(myDocType);
247	}
248	catch (Exception e)
249	{
250	System.out.println("Could not append docType because: " + e);
251	}
252
253	// return the document containing a DocType
254	return docImpl;
255	}
256
257	/** returns the Node as a String */
258	public static String getString(Node xmlNode)
259	{
260	StringBuffer xmlRepresentation = new StringBuffer();
261	getString(xmlNode, xmlRepresentation, 0, false);
262	return xmlRepresentation.toString();
263	}
264
265	/**
266	* returns the node as a nicely formatted String - this introduces extra
267	* text nodes if the String is read back in as a DOM, so should only be used
268	* for printing
269	*/
270	public static String getPrettyString(Node xmlNode)
271	{
272	StringBuffer xmlRepresentation = new StringBuffer();
273	getString(xmlNode, xmlRepresentation, 0, true);
274	return xmlRepresentation.toString();
275	}
276
277	/*
278	* For the purposes of logger.debug statements, where this is called and
279	* hence outputted, returns an empty string if debugging is not enabled
280	*/
281	public static String getPrettyStringLogger(Node xmlNode, Logger log)
282	{
283
284	if (log.isDebugEnabled())
285	return getPrettyString(xmlNode);
286
287	return "";
288
289	}
290
291	private static void getString(Node xmlNode, StringBuffer xmlRepresentation, int depth, boolean pretty)
292	{
293
294	if (xmlNode == null)
295	{
296	xmlRepresentation.append("<null>");
297	return;
298	}
299
300	short nodeType = xmlNode.getNodeType();
301	String nodeName = xmlNode.getNodeName();
302
303	if (nodeType == Node.DOCUMENT_NODE)
304	{
305	Document xmlDocNode = (Document) xmlNode;
306
307	//if (xmlDocNode.getDoctype() == null) {
308	//System.err.println("Doctype is null.");
309	//}
310	//else {
311	if (xmlDocNode.getDoctype() != null)
312	{
313	DocumentType dt = xmlDocNode.getDoctype();
314
315	String name = dt.getName();
316	String pid = dt.getPublicId();
317	String sid = dt.getSystemId();
318
319	// Use previously assigned name, not dt.getName() again
320	String doctype_str = "<!DOCTYPE " + name + " PUBLIC \"" + pid + "\" \"" + sid + "\">\n";
321
322	xmlRepresentation.append(doctype_str);
323	}
324	getString(xmlDocNode.getDocumentElement(), xmlRepresentation, depth, pretty);
325	return;
326	}
327	// Handle Element nodes
328	if (nodeType == Node.ELEMENT_NODE)
329	{
330	if (pretty)
331	{
332	xmlRepresentation.append("\n");
333	for (int i = 0; i < depth; i++)
334	{
335	xmlRepresentation.append(" ");
336	}
337	}
338
339	// Write opening tag
340	xmlRepresentation.append("<");
341	xmlRepresentation.append(nodeName);
342
343	// Write the node attributes
344	NamedNodeMap nodeAttributes = xmlNode.getAttributes();
345	for (int i = 0; i < nodeAttributes.getLength(); i++)
346	{
347	Node attribute = nodeAttributes.item(i);
348	xmlRepresentation.append(" ");
349	xmlRepresentation.append(attribute.getNodeName());
350	xmlRepresentation.append("=\"");
351	String attr_val = attribute.getNodeValue();
352
353	attr_val = attr_val.replaceAll("&","&");
354	attr_val = attr_val.replaceAll("<","<");
355	attr_val = attr_val.replaceAll(">",">");
356	attr_val = attr_val.replaceAll("\"",""");
357	// assume that any of the above chars that was already entity escaped
358	// was already correct => return back to how they were
359	attr_val = attr_val.replaceAll("&amp;","&");
360	attr_val = attr_val.replaceAll("&lt;","<");
361	attr_val = attr_val.replaceAll("&gt;",">");
362	attr_val = attr_val.replaceAll("&quot;",""");
363
364
365	xmlRepresentation.append(attr_val);
366	xmlRepresentation.append("\"");
367	}
368
369	// If the node has no children, close the opening tag and return
370	if (xmlNode.hasChildNodes() == false)
371	{
372	// This produces somewhat ugly output, but it is necessary to compensate
373	// for display bugs in Netscape. Firstly, the space is needed before the
374	// closing bracket otherwise Netscape will ignore some tags (<br/>, for
375	// example). Also, a newline character would be expected after the tag,
376	// but this causes problems with the display of links (the link text
377	// will contain a newline character, which is displayed badly).
378	xmlRepresentation.append(" />");
379	return;
380	}
381
382	// Close the opening tag
383	xmlRepresentation.append(">");
384
385	// Process the children. We process text nodes here, but recursively process other nodes.
386	// hack for nodes next to text nodes - dont make them pretty, ie don'e do any new lines or indenting
387	// Usually text nodes will be inside their own element. Sometimes we have eg span tags next to text nodes - don't want those indented.
388	// also if these are inside a pre tag then the space shows up in the page.
389
390	NodeList children = xmlNode.getChildNodes();
391	boolean do_pretty = pretty;
392	boolean output_escaping = true; // record if we have encountered a disable-output-escaping instruction
393	for (int i = 0; i < children.getLength(); i++)
394	{
395	Node child = children.item(i);
396	short child_type = child.getNodeType();
397	if (child_type == Node.PROCESSING_INSTRUCTION_NODE) {
398	if (child.getNodeName().equals("javax.xml.transform.disable-output-escaping")) {
399	output_escaping = false;
400	}
401	else if (child.getNodeName().equals("javax.xml.transform.enable-output-escaping")) {
402	output_escaping = true;
403	}
404	else {
405	logger.warn("Unhandled processing instruction " + child.getNodeName());
406	}
407	}
408	else if (child_type == Node.TEXT_NODE) {
409	do_pretty = false; // if there is a text node amongst the children, do all the following nodes in non-pretty mode - hope this doesn't stuff up something else
410	// output the text
411	String text = child.getNodeValue();
412
413	// Perform output escaping, if required
414	// Apache Commons replace method is far superior to String.replaceAll - very fast!
415	if (output_escaping) {
416	text = StringUtils.replace(text, "&", "&");
417	text = StringUtils.replace(text, "<", "<");
418	text = StringUtils.replace(text, ">", ">");
419	text = StringUtils.replace(text, "'", "'");
420	text = StringUtils.replace(text, "\"", """);
421	}
422	// Remove any control-C characters
423	text = StringUtils.replace(text, "" + (char) 3, "");
424
425	xmlRepresentation.append(text);
426
427	}
428	else {
429	// recursively call getString
430	getString(child, xmlRepresentation, depth + 1, do_pretty);
431	}
432	} // foreach child of the element
433
434	// Write closing tag
435	if (pretty)
436	{
437	if (xmlRepresentation.charAt(xmlRepresentation.length() - 1) == '\n')
438	{
439	for (int i = 0; i < depth; i++)
440	xmlRepresentation.append(" ");
441	}
442	}
443	xmlRepresentation.append("</");
444	xmlRepresentation.append(nodeName);
445	xmlRepresentation.append(">");
446	if (pretty)
447	{
448	xmlRepresentation.append("\n");
449	}
450	} // ELEMENT_NODE
451
452	else if (nodeType == Node.COMMENT_NODE)
453	{
454	String text = xmlNode.getNodeValue();
455	xmlRepresentation.append("<!-- ");
456	xmlRepresentation.append(text);
457	xmlRepresentation.append(" -->");
458	}
459
460	// TEXT and PROCESSING_INSTRUCTION nodes are handled inside their containing element node
461	// A type of node that is not handled yet
462	else
463	{
464	logger.warn("Unknown node type: " + nodeType + " " + getNodeTypeString(nodeType));
465	}
466
467	return;
468	}
469
470	protected static String getNodeTypeString(short node_type)
471	{
472
473	String type = "";
474	switch (node_type)
475	{
476	case Node.ATTRIBUTE_NODE:
477	type = "ATTRIBUTE_NODE";
478	break;
479	case Node.CDATA_SECTION_NODE:
480	type = "CDATA_SECTION_NODE";
481	break;
482	case Node.COMMENT_NODE:
483	type = "COMMENT_NODE";
484	break;
485	case Node.DOCUMENT_FRAGMENT_NODE:
486	type = "DOCUMENT_FRAGMENT_NODE";
487	break;
488	case Node.DOCUMENT_NODE:
489	type = "DOCUMENT_NODE";
490	break;
491	case Node.DOCUMENT_TYPE_NODE:
492	type = "DOCUMENT_TYPE_NODE";
493	break;
494	case Node.ELEMENT_NODE:
495	type = "ELEMENT_NODE";
496	break;
497	case Node.ENTITY_NODE:
498	type = "ENTITY_NODE";
499	break;
500	case Node.ENTITY_REFERENCE_NODE:
501	type = "ENTITY_REFERENCE_NODE";
502	break;
503	case Node.NOTATION_NODE:
504	type = "NOTATION_NODE";
505	break;
506	case Node.PROCESSING_INSTRUCTION_NODE:
507	type = "PROCESSING_INSTRUCTION_NODE";
508	break;
509	case Node.TEXT_NODE:
510	type = "TEXT_NODE";
511	break;
512	default:
513	type = "UNKNOWN";
514	}
515
516	return type;
517	}
518
519	// returns null if there no error occurred during parsing, or else returns the error message
520
521	// public String getParseErrorMessage()
522	// {
523	// ParseErrorHandler errorHandler = (ParseErrorHandler) this.parser.getErrorHandler();
524	// return errorHandler.getErrorMessage();
525	// }
526
527	// Errorhandler for SAXParseExceptions that are errors, fatal errors or warnings. This class can be used to
528	// register a handler for any fatal errors, errors and warnings that may occur when parsing an xml file. The
529	// errors are printed both to the greenstone.log and to the tomcat console (System.err), and the error message
530	// is stored in the errorMessage variable so that it can be retrieved and be used to generate an xhtml error page.
531	static public class ParseErrorHandler implements ErrorHandler
532	{
533	protected String errorMessage = null;
534
535	// Receive notification of a recoverable error.
536	public void error(SAXParseException exception)
537	{
538	handleError("Error:\n", exception);
539	}
540
541	// Receive notification of a non-recoverable error.
542	public void fatalError(SAXParseException exception)
543	{
544	handleError("Fatal Error:\n", exception);
545	}
546
547	// Receive notification of a warning.
548	public void warning(SAXParseException exception)
549	{
550	handleError("Warning:\n", exception);
551	}
552
553	public String toString(SAXParseException e)
554	{
555	String msg = e.getMessage();
556	msg += "\nOn line(column): " + e.getLineNumber() + "(" + e.getColumnNumber() + ")";
557	msg += (e.getPublicId() != null) ? ("\npublic ID: " + e.getPublicId()) : "\nNo public ID";
558	msg += (e.getSystemId() != null) ? ("\nsystem ID: " + e.getSystemId()) : "\nNo system ID";
559
560	return msg;
561	}
562
563	// clears the errorPage variable after first call to this method
564	public String getErrorMessage()
565	{
566	String errMsg = this.errorMessage;
567	if (this.errorMessage != null)
568	{
569	this.errorMessage = null;
570	}
571	return errMsg;
572	}
573
574	// sets the errorMessage member variable to the data stored in the exception
575	// and writes the errorMessage to the logger and tomcat's System.err
576	protected void handleError(String errorType, SAXParseException exception)
577	{
578	this.errorMessage = errorType + toString(exception);
579	System.err.println("\n**Error parsing xml:\n" + this.errorMessage + "\n**\n");
580	logger.error(this.errorMessage);
581	}
582	}
583
584	public static boolean writeDOM(Element elem, File file) {
585
586	BufferedWriter writer = null;
587	boolean success = false;
588	try {
589	String xml_string = getString(elem);
590	// need createNewFile???
591	writer = new BufferedWriter(new FileWriter(file));
592	writer.write(xml_string);
593	success = true;
594	}
595
596	catch (Exception e) {
597	logger.error(e.getMessage());
598	success = false;
599	}
600	finally {
601	try {
602	if (writer != null) {
603	writer.close();
604	}
605	} catch(Exception e) {
606	logger.error("couldn't close the file"+e.getMessage());
607	}
608	}
609	return success;
610	}
611	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: