source: other-projects/FileTransfer-WebSocketPair/testGXTWithGreenstone/src/org/greenstone/gatherer/util/XMLTools.java@ 33053

Last change on this file since 33053 was 33053, checked in by ak19, 5 years ago

I still had some stuff of Nathan Kelly's (FileTransfer-WebSocketPair) sitting on my USB. Had already commited the Themes folder at the time, 2 years back. Not sure if he wanted this additional folder commited. But I didn't want to delete it and decided it will be better off on SVN. When we use his project, if we find we didn't need this test folder, we can remove it from svn then.

File size: 33.4 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15import java.io.StringWriter; // for elementToString()
16
17// SAX
18import org.xml.sax.XMLReader;
19import org.xml.sax.SAXException;
20import org.xml.sax.SAXParseException;
21import org.xml.sax.helpers.DefaultHandler;
22import org.xml.sax.InputSource;
23
24// JAXP
25import javax.xml.parsers.DocumentBuilder;
26import javax.xml.parsers.DocumentBuilderFactory;
27import javax.xml.parsers.FactoryConfigurationError;
28import javax.xml.parsers.ParserConfigurationException;
29import javax.xml.parsers.SAXParser;
30import javax.xml.parsers.SAXParserFactory;
31// for elementToString():
32import javax.xml.transform.OutputKeys;
33import javax.xml.transform.Transformer;
34import javax.xml.transform.TransformerFactory;
35import javax.xml.transform.dom.DOMSource;
36import javax.xml.transform.stream.StreamResult;
37
38
39/** This class is a static class containing useful XML functions */
40public class XMLTools
41{
42 /** extracts the text out of a node */
43 public static Node getNodeTextNode(Element param)
44 {
45 param.normalize();
46 Node n = param.getFirstChild();
47 while (n != null && n.getNodeType() != Node.TEXT_NODE)
48 {
49 n = n.getNextSibling();
50 }
51 return n;
52 }
53
54 /** extracts the text out of a node */
55 public static String getNodeText(Element param)
56 {
57 Node text_node = getNodeTextNode(param);
58 if (text_node == null)
59 {
60 return "";
61 }
62 return text_node.getNodeValue();
63 }
64
65 public static void setNodeText(Element elem, String text)
66 {
67 Node old_text_node = getNodeTextNode(elem);
68 if (old_text_node != null)
69 {
70 elem.removeChild(old_text_node);
71 }
72 Text t = elem.getOwnerDocument().createTextNode(text);
73 elem.appendChild(t);
74 }
75
76 /** returns the (first) child element with the given name */
77 public static Node getChildByTagName(Node n, String name)
78 {
79
80 Node child = n.getFirstChild();
81 while (child != null)
82 {
83 if (child.getNodeName().equals(name))
84 {
85 return child;
86 }
87 child = child.getNextSibling();
88 }
89 return null; //not found
90 }
91
92 /**
93 * returns the (nth) child element with the given name index numbers start
94 * at 0
95 */
96 public static Node getChildByTagNameIndexed(Node n, String name, int index)
97 {
98 if (index == -1)
99 {
100 return getChildByTagName(n, name);
101 }
102 int count = 0;
103 Node child = n.getFirstChild();
104 while (child != null)
105 {
106 if (child.getNodeName().equals(name))
107 {
108 if (count == index)
109 {
110 return child;
111 }
112 else
113 {
114 count++;
115 }
116 }
117 child = child.getNextSibling();
118 }
119 return null; //not found
120 }
121
122 /**
123 * returns the element parent/node_name[@attribute_name='attribute_value']
124 */
125 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126 {
127
128 NodeList children = parent.getChildNodes();
129 for (int i = 0; i < children.getLength(); i++)
130 {
131 Node child = children.item(i);
132 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
133 if (child.getNodeName().equals(node_name))
134 {
135 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136 return (Element) child;
137 }
138 }
139 // not found
140 return null;
141 }
142
143 /**
144 * returns a list of elements
145 * parent/node_name[@attribute_name='attribute_value']
146 */
147 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148 {
149 ArrayList elements = new ArrayList();
150 NodeList children = parent.getChildNodes();
151 for (int i = 0; i < children.getLength(); i++)
152 {
153 //System.out.println("getNamedElementList");
154 Node child = children.item(i);
155 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
156 if (child.getNodeName().equals(node_name))
157 {
158 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159 elements.add((Element) child);
160 }
161 }
162 // not found
163 if (elements.size() == 0)
164 {
165 elements = null;
166 }
167 return elements;
168 }
169
170 public static void copyAllChildren(Element to, Element from)
171 {
172
173 Document to_doc = to.getOwnerDocument();
174 Node child = from.getFirstChild();
175 while (child != null)
176 {
177 to.appendChild(to_doc.importNode(child, true));
178 child = child.getNextSibling();
179 }
180 }
181
182 /** duplicates all elements in list elements and appends to toElement */
183 public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184 int num_elems = elements.getLength();
185 if (num_elems < 1)
186 {
187 return;
188 }
189 for (int i = 0; i < num_elems; i++)
190 {
191 Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192 toElement.appendChild(to_element);
193 }
194
195 }
196 /** Duplicates an element */
197 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198 {
199 return duplicateElementNS(owner, element, null, with_attributes);
200 }
201
202 /** Duplicates an element */
203 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204 {
205 Element duplicate;
206 if (namespace_uri == null)
207 {
208 duplicate = owner.createElement(element.getTagName());
209 }
210 else
211 {
212 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213 }
214 // Copy element attributes
215 if (with_attributes)
216 {
217 NamedNodeMap attributes = element.getAttributes();
218 for (int i = 0; i < attributes.getLength(); i++)
219 {
220 Node attribute = attributes.item(i);
221 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222 }
223 }
224
225 // Copy element children
226 NodeList children = element.getChildNodes();
227 for (int i = 0; i < children.getLength(); i++)
228 {
229 Node child = children.item(i);
230 duplicate.appendChild(owner.importNode(child, true));
231 }
232
233 return duplicate;
234 }
235
236 /** Remove all of the child nodes from a certain node. */
237 static final public void clear(Node node)
238 {
239 while (node.hasChildNodes())
240 {
241 node.removeChild(node.getFirstChild());
242 }
243 }
244
245 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246 {
247 ArrayList child_elements = new ArrayList();
248
249 NodeList children_nodelist = parent_element.getChildNodes();
250 for (int i = 0; i < children_nodelist.getLength(); i++)
251 {
252 Node child_node = children_nodelist.item(i);
253 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254 {
255 child_elements.add(child_node);
256 }
257 }
258
259 return child_elements;
260 }
261
262 static public String getElementTextValue(Element element)
263 {
264 // Find the first text node child
265 NodeList children_nodelist = element.getChildNodes();
266 for (int i = 0; i < children_nodelist.getLength(); i++)
267 {
268 Node child_node = children_nodelist.item(i);
269 if (child_node.getNodeType() == Node.TEXT_NODE)
270 {
271 return child_node.getNodeValue();
272 }
273 }
274
275 // None found
276 return "";
277 }
278
279 /**
280 * Method to retrieve the value of a given node.
281 *
282 * @param element
283 * The <strong>Element</strong> whose value we wish to find. Soon
284 * to be deprecated!
285 */
286 static final public String getValue(Node element)
287 {
288 if (element == null)
289 {
290 return "";
291 }
292 // If we've been given a subject node first retrieve its value node.
293 if (element.getNodeName().equals("Subject"))
294 {
295 element = getNodeFromNamed(element, "Value");
296 }
297 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298 if (element != null && element.hasChildNodes())
299 {
300 StringBuffer text_buffer = new StringBuffer();
301 NodeList text_nodes = element.getChildNodes();
302 for (int i = 0; i < text_nodes.getLength(); i++)
303 {
304 Node possible_text = text_nodes.item(i);
305 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306 {
307 text_buffer.append(possible_text.getNodeValue());
308 }
309 }
310 return text_buffer.toString();
311 }
312 return "";
313 }
314
315 /**
316 * Method to retrieve from the node given, a certain child node with the
317 * specified name.
318 *
319 * @param parent
320 * The <strong>Node</strong> whose children should be searched.
321 * @param name
322 * The required nodes name as a <strong>String</strong>.
323 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
324 * otherwise. Soon to be deprecated!
325 */
326 static final public Node getNodeFromNamed(Node parent, String name)
327 {
328 Node child = null;
329 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330 {
331 if (i.getNodeName().equals(name))
332 {
333 child = i;
334 }
335 }
336 return child;
337 }
338
339 static final public String WELLFORMED = "well-formed !";
340 static final public String NOTWELLFORMED = "not well-formed";
341 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:gslib='http://www.greenstone.org/skinning' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342 static final private String FOOTER = "</collectionConfig>";
343
344
345 public static Document getDOM(String xml_str)
346 {
347 Document doc = null;
348 try {
349
350 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351 InputSource is = new InputSource();
352 is.setCharacterStream(new StringReader(xml_str));
353 doc = db.parse(is);
354
355 } catch (Exception e) {
356 e.printStackTrace();
357 }
358 return doc;
359 }
360
361 public static String parse(String xml_str)
362 {
363 String validation_msg = WELLFORMED;
364 xml_str = HEADER + xml_str + FOOTER;
365 try
366 {
367 SAXParserFactory factory = SAXParserFactory.newInstance();
368 factory.setNamespaceAware(true);
369 //factory.setValidating (true);
370 SAXParser parser = factory.newSAXParser();
371 InputSource iSource = new InputSource(new StringReader(xml_str));
372 // parser.parse (iSource, new DefaultHandler ());
373
374 org.xml.sax.XMLReader reader = parser.getXMLReader();
375 reader.setContentHandler(new DefaultHandler());
376 reader.setErrorHandler(new DefaultHandler());
377 reader.parse(iSource);
378 }
379 catch (FactoryConfigurationError e)
380 {
381 validation_msg = "unable to get a document builder factory";
382 }
383 catch (ParserConfigurationException e)
384 {
385 validation_msg = "unable to configure parser";
386 }
387 catch (SAXParseException e)
388 {
389 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390 }
391 catch (SAXException e)
392 {
393 validation_msg += " Fatal error: " + e.toString();
394 }
395 catch (IOException e)
396 {
397 validation_msg = "Unable to read the input, i/o error";
398 }
399
400 return validation_msg;
401 }
402
403 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404 public static String parseDOM(String xml_str)
405 {
406 String validation_msg = WELLFORMED;
407
408 try
409 {
410 SAXParserFactory factory = SAXParserFactory.newInstance();
411 factory.setNamespaceAware(true);
412 //factory.setValidating (true);
413 SAXParser parser = factory.newSAXParser();
414 InputSource iSource = new InputSource(new StringReader(xml_str));
415 // parser.parse (iSource, new DefaultHandler ());
416
417 org.xml.sax.XMLReader reader = parser.getXMLReader();
418 reader.setContentHandler(new DefaultHandler());
419 reader.setErrorHandler(new DefaultHandler());
420 reader.parse(iSource);
421 }
422 catch (FactoryConfigurationError e)
423 {
424 validation_msg = "unable to get a document builder factory";
425 }
426 catch (ParserConfigurationException e)
427 {
428 validation_msg = "unable to configure parser";
429 }
430 catch (SAXParseException e)
431 {
432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433 }
434 catch (SAXException e)
435 {
436 validation_msg += " " + e.toString();
437 }
438 catch (IOException e)
439 {
440 validation_msg = "Unable to read the input, i/o error";
441 }
442
443 return validation_msg;
444 }
445
446 public static String parse(File xml_file)
447 {
448 String validation_msg = WELLFORMED;
449
450 try
451 {
452 SAXParserFactory factory = SAXParserFactory.newInstance();
453 factory.setNamespaceAware(true);
454 //factory.setValidating (true);
455 SAXParser parser = factory.newSAXParser();
456 FileReader r = new FileReader(xml_file);
457 InputSource iSource = new InputSource(r);
458 XMLReader reader = parser.getXMLReader();
459 reader.setContentHandler(new DefaultHandler());
460 reader.setErrorHandler(new DefaultHandler());
461 reader.parse(iSource);
462 }
463 catch (FactoryConfigurationError e)
464 {
465 validation_msg = "unable to get a document builder factory";
466 }
467 catch (ParserConfigurationException e)
468 {
469 validation_msg = "unable to configure parser";
470 }
471 catch (SAXParseException e)
472 {
473 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474 }
475 catch (SAXException e)
476 {
477 validation_msg += " Fatal error: " + e.toString();
478 }
479 catch (IOException e)
480 {
481 validation_msg = "Unable to read the input, i/o error";
482 }
483
484 return validation_msg;
485 }
486
487 /** Returns a string of the location. */
488 private static String getLocationString(SAXParseException ex)
489 {
490 StringBuffer str = new StringBuffer();
491
492 String systemId = ex.getSystemId();
493 if (systemId != null)
494 {
495 int index = systemId.lastIndexOf('/');
496 if (index != -1)
497 systemId = systemId.substring(index + 1);
498 str.append(systemId);
499 }
500 str.append("(line ");
501 str.append(ex.getLineNumber() - 1);
502 str.append(", column ");
503 str.append(ex.getColumnNumber());
504 str.append("): ");
505
506 return str.toString();
507
508 } // getLocationString(SAXParseException):String
509
510 /** Parse an XML document from a given file path */
511 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512 {
513 if (use_class_loader == true)
514 {
515 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516 if (is != null)
517 {
518 return parseXML(is);
519 }
520 }
521
522 // Try the file outside the classes directory
523 return parseXMLFile(new File(xml_file_path));
524 }
525
526 /** Parse an XML document from a given file */
527 static public Document parseXMLFile(File xml_file)
528 {
529 // No file? No point trying!
530 if (xml_file.exists() == false)
531 {
532 // System.err.println("@@@ file " + xml_file + " does not exist.");
533 return null;
534 }
535
536 try
537 {
538 return parseXML(new FileInputStream(xml_file));
539 }
540 catch (Exception exception)
541 {
542 DebugStream.printStackTrace(exception);
543 return null;
544 }
545 }
546
547 /** Parse an XML document from a given input stream */
548 static public Document parseXML(InputStream xml_input_stream)
549 {
550 Document document = null;
551
552 try
553 {
554 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555 document = parseXML(isr);
556 isr.close();
557 xml_input_stream.close();
558 }
559 catch (Exception exception)
560 {
561 DebugStream.printStackTrace(exception);
562 }
563
564 return document;
565 }
566
567 /** Parse an XML document from a given reader */
568 static public Document parseXML(Reader xml_reader)
569 {
570 Document document = null;
571
572 // If debugging, the following will store the XML contents to be parsed,
573 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574 String xmlContents = "";
575
576 try
577 {
578 Reader reader = null;
579
580 // (1) By default, GLI will remove any contents preceeding (and invalidating)
581 // the XML and present these lines separately to the user
582 if (!DebugStream.isDebuggingEnabled())
583 {
584 try
585 {
586 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587 }
588 catch (Exception e)
589 {
590 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591 e.printStackTrace();
592 }
593 }
594
595 // (2) If we are running GLI in debug mode:
596 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597 // idea of where things went wrong. This will print the "XML" contents to either
598 // system.out (if debugging is off) or to the DebugStream otherwise.
599 // We need to read the XML twice to know the line where things went wrong, so
600 // do the additional reading only if we're debugging
601 else
602 {
603 StringBuffer buf = new StringBuffer();
604 char[] buffer = new char[500];
605 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606 while (numCharsRead != -1)
607 {
608 buf.append(buffer, 0, numCharsRead);
609 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610 }
611 xmlContents = buf.toString();
612 xml_reader.close(); // closing the old Reader
613 xml_reader = null;
614 buffer = null;
615 buf = null;
616 // we need a Reader to parse the same contents as the Reader that was just closed
617 reader = new BufferedReader(new StringReader(xmlContents));
618 //System.err.println("xmlContents:\n" + xmlContents);
619 }
620
621 // (2) The actual XML parsing
622 InputSource isc = new InputSource(reader);
623 DOMParser parser = new DOMParser();
624 parser.setFeature("http://xml.org/sax/features/validation", false);
625 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629 parser.setEntityResolver(new GLIEntityResolver());
630 parser.parse(isc);
631 document = parser.getDocument();
632
633 }
634 catch (SAXParseException e)
635 {
636 showXMLParseFailureLine(e, xmlContents);
637 }
638 catch (SAXException exception)
639 {
640 System.err.println("SAX exception: " + exception.getMessage());
641 if (DebugStream.isDebuggingEnabled())
642 {
643 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
644 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646 System.exit(-1);
647 }
648 // else, not running in debug mode, so don't exit after exception
649 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650 DebugStream.printStackTrace(exception);
651 }
652 catch (Exception exception)
653 {
654 DebugStream.printStackTrace(exception);
655 }
656
657 return document;
658 }
659
660 /**
661 * Displays the line (string) where the SAXParseException occurred, given a
662 * String of the entire xml that was being parsed and the SAXParseException
663 * object that was caught. The messages are printed to DebugStream, so run
664 * GLI/FLI with -debug to view this output.
665 *
666 * @param xmlContents
667 * is the entire xml that was being parsed when the exception
668 * occurred
669 * @param e
670 * is the SAXParseException object that was thrown upon parsing
671 * the xmlContents.
672 */
673 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674 {
675
676 // There should be no characters at all that preceed the <?xml>... bit.
677 // The first check is for starting spaces:
678 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
679 {
680 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682 return; // nothing more to do, first error identified
683 }
684
685 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
686 String line = "";
687 int linenumber = e.getLineNumber();
688 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689 if (DebugStream.isDebuggingEnabled())
690 {
691 if (linenumber != -1)
692 {
693 String[] lines = xmlContents.split("\n");
694 if (lines.length > 0)
695 {
696 DebugStream.println(" (number of lines: " + lines.length + ")");
697 if (lines.length >= linenumber)
698 {
699 line = lines[linenumber - 1];
700 }
701 else
702 { // error is past the last line
703 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704 }
705 }
706 else
707 {
708 DebugStream.print("\n");
709 }
710 lines = null;
711
712 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
713 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715 // Uncomment if you want to print out the entire contents of the XML doc:
716 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
717 // + xmlContents + "\n************END\n");
718 }
719 else
720 { // no particular line number, print out all the xml so debugger can inspect it
721 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
722 }
723 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725 System.exit(-1);
726 }
727 else
728 { // not running in debug mode
729 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730 }
731 }
732
733 static public StringBuffer readXMLStream(InputStream input_stream)
734 {
735 StringBuffer xml = new StringBuffer("");
736
737 try
738 {
739 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
740 BufferedReader buffered_in = new BufferedReader(isr);
741
742 String line = "";
743 boolean xml_content = false;
744 while ((line = buffered_in.readLine()) != null)
745 {
746 if (xml_content)
747 {
748 xml.append(line);
749 xml.append("\n");
750 }
751 else if (line.trim().startsWith("<?xml"))
752 {
753 xml_content = true;
754 xml.append(line);
755 xml.append("\n");
756 }
757 else
758 {
759 System.err.println(line);
760 }
761 }
762 buffered_in = null;
763 }
764 catch (Exception error)
765 {
766 System.err.println("Failed when trying to parse XML stream");
767 error.printStackTrace();
768 }
769
770 return xml;
771 }
772
773 /**
774 * Removes characters that are invalid in XML (see
775 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
776 */
777 static public String removeInvalidCharacters(String text)
778 {
779 char[] safe_characters = new char[text.length()];
780 int j = 0;
781
782 char[] raw_characters = new char[text.length()];
783 text.getChars(0, text.length(), raw_characters, 0);
784 for (int i = 0; i < raw_characters.length; i++)
785 {
786 char character = raw_characters[i];
787 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
788 {
789 safe_characters[j] = character;
790 j++;
791 }
792 }
793
794 return new String(safe_characters, 0, j);
795 }
796
797 static public void setElementTextValue(Element element, String text)
798 {
799 // Remove all text node children
800 NodeList children_nodelist = element.getChildNodes();
801 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
802 {
803 Node child_node = children_nodelist.item(i);
804 if (child_node.getNodeType() == Node.TEXT_NODE)
805 {
806 element.removeChild(child_node);
807 }
808 }
809
810 // Add a new text node
811 if (text != null)
812 {
813 element.appendChild(element.getOwnerDocument().createTextNode(text));
814 }
815 }
816
817 /**
818 * Set the #text node value of some element.
819 *
820 * @param element
821 * the Element whose value we wish to set
822 * @param value
823 * the new value for the element as a String Soon to be
824 * deprecated!
825 */
826 static final public void setValue(Element element, String value)
827 {
828 // Remove any existing child node(s)
829 clear(element);
830 // Add new text node.
831 if (value != null)
832 {
833 element.appendChild(element.getOwnerDocument().createTextNode(value));
834 }
835 }
836
837 static public void indentXML(Element elem, int depth)
838 {
839 Document doc = elem.getOwnerDocument();
840
841 String startIndentString = "\n";
842 for (int i = 0; i < depth; i++)
843 {
844 startIndentString += "\t";
845 }
846 Node startTextNode = doc.createTextNode(startIndentString);
847
848 String endIndentString = "\n";
849 for (int i = 0; i < depth - 1; i++)
850 {
851 endIndentString += "\t";
852 }
853 Node endTextNode = doc.createTextNode(endIndentString);
854
855 boolean found = false;
856 Node child = elem.getFirstChild();
857 while (child != null)
858 {
859 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
860 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
861 {
862 Node spaceTextNode = child;
863 child = child.getNextSibling();
864 elem.removeChild(spaceTextNode);
865
866 if(child == null) break;
867 }
868
869 // now process normal element nodes as intended
870 if (child.getNodeType() == Node.ELEMENT_NODE)
871 {
872 found = true;
873 break;
874 }
875 child = child.getNextSibling();
876 }
877
878 if (found)
879 {
880 elem.appendChild(endTextNode);
881 }
882
883 child = elem.getFirstChild();
884 while (child != null)
885 {
886 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
887 // because the first while loop above would break out when it found an element node and wouldn't have got rid
888 // of all the empty text nodes yet.
889 // This time, beware not to delete the special end and start empty textnodes just added, since
890 // they've been created and inserted specifically.
891 if(child != endTextNode && child != startTextNode
892 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
893 {
894 Node spaceTextNode = child;
895 child = child.getNextSibling();
896 elem.removeChild(spaceTextNode);
897
898 if(child == null) break;
899 }
900
901 // go back to processing normal element nodes as intended
902 if (child.getNodeType() == Node.ELEMENT_NODE)
903 {
904 elem.insertBefore(startTextNode.cloneNode(false), child);
905 indentXML((Element) child, depth + 1);
906 }
907 child = child.getNextSibling();
908 }
909 }
910
911 /**
912 * Write an XML document to a given file with the text node of the specified
913 * element unescaped
914 */
915 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
916 {
917 indentXML(document.getDocumentElement(), 1);
918 try
919 {
920 OutputStream os = new FileOutputStream(xml_file);
921 // Create an output format for our document.
922 OutputFormat f = new OutputFormat(document);
923 f.setEncoding("UTF-8");
924 f.setIndenting(true);
925 f.setLineWidth(0); // Why isn't this working!
926 f.setPreserveSpace(true);
927 if (nonEscapingTagNames != null)
928 {
929 f.setNonEscapingElements(nonEscapingTagNames);
930 }
931 // Create the necessary writer stream for serialization.
932 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
933 Writer w = new BufferedWriter(osw);
934 // Generate a new serializer from the above.
935 XMLSerializer s = new XMLSerializer(w, f);
936 s.asDOMSerializer();
937 // Finally serialize the document to file.
938 s.serialize(document);
939 // And close.
940 os.close();
941 }
942 catch (Exception exception)
943 {
944 DebugStream.printStackTrace(exception);
945 }
946 }
947
948 /** Write an XML document to a given file */
949 static public void writeXMLFile(File xml_file, Document document)
950 {
951 writeXMLFile(xml_file, document, null);
952 }
953
954 public static void printXMLNode(Node e)
955 {
956 printXMLNode(e, 0);
957 }
958
959 public static void printXMLNode(Node e, int depth)
960 { //recursive method call using DOM API...
961
962 for (int i = 0; i < depth; i++)
963 System.out.print(' ');
964
965 if (e.getNodeType() == Node.TEXT_NODE)
966 {
967 //System.out.println("text") ;
968 if (e.getNodeValue() != "")
969 {
970 System.out.println(e.getNodeValue());
971 }
972 return;
973 }
974
975 System.out.print('<');
976 System.out.print(e.getNodeName());
977 NamedNodeMap attrs = e.getAttributes();
978 if (attrs != null)
979 {
980 for (int i = 0; i < attrs.getLength(); i++)
981 {
982 Node attr = attrs.item(i);
983 System.out.print(' ');
984 System.out.print(attr.getNodeName());
985 System.out.print("=\"");
986 System.out.print(attr.getNodeValue());
987 System.out.print('"');
988 }
989 }
990 NodeList children = e.getChildNodes();
991
992 if (children == null || children.getLength() == 0)
993 System.out.println("/>");
994 else
995 {
996
997 System.out.println('>');
998
999 int len = children.getLength();
1000 for (int i = 0; i < len; i++)
1001 {
1002 printXMLNode(children.item(i), depth + 1);
1003 }
1004
1005 for (int i = 0; i < depth; i++)
1006 System.out.print(' ');
1007
1008 System.out.println("</" + e.getNodeName() + ">");
1009 }
1010
1011 }
1012
1013 public static String xmlNodeToString(Node e)
1014 {
1015 StringBuffer sb = new StringBuffer("");
1016 xmlNodeToString(sb, e, true, "\t", 2);
1017 return sb.toString();
1018 }
1019
1020 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1021 {
1022
1023 if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1024 {
1025 if (e.getNodeValue() != "")
1026 {
1027 String text = e.getNodeValue();
1028 sb.append("<![CDATA[");
1029 sb.append(text);
1030 sb.append("]]>");
1031 }
1032 return;
1033 }
1034
1035 if (e.getNodeType() == Node.TEXT_NODE)
1036 {
1037 if (e.getNodeValue() != "")
1038 {
1039 String text = e.getNodeValue();
1040 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
1041 for (Character c : text.toCharArray())
1042 {
1043 if (c.equals('\n'))
1044 {
1045 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1046 break;
1047 }
1048
1049 if (!Character.isWhitespace(c))
1050 {
1051 break;
1052 }
1053 }
1054 sb.append(text);
1055 }
1056 return;
1057 }
1058
1059 if (e.getNodeType() == Node.COMMENT_NODE)
1060 {
1061 if (e.getNodeValue() != "")
1062 {
1063 sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1064 }
1065 return;
1066 }
1067
1068 if (indent)
1069 {
1070 for (int i = 0; i < depth; i++)
1071 {
1072 sb.append(indentString);
1073 }
1074 }
1075
1076 sb.append('<');
1077 sb.append(e.getNodeName());
1078 NamedNodeMap attrs = e.getAttributes();
1079 if (attrs != null)
1080 {
1081 for (int i = 0; i < attrs.getLength(); i++)
1082 {
1083 Node attr = attrs.item(i);
1084 sb.append(' ');
1085 sb.append(attr.getNodeName());
1086 sb.append("=\"");
1087 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1088 sb.append('"');
1089 }
1090 }
1091 NodeList children = e.getChildNodes();
1092
1093 boolean hasElements = false;
1094 boolean indentSwapped = false;
1095 for (int i = 0; i < children.getLength(); i++)
1096 {
1097 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1098 {
1099 hasElements = true;
1100 }
1101 if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1102 {
1103 if (children.item(i).getNodeValue().trim().length() > 0)
1104 {
1105 indentSwapped = true;
1106 indent = false;
1107 }
1108 }
1109 }
1110
1111 if (children == null || children.getLength() == 0)
1112 {
1113 sb.append("/>");
1114
1115 if (indent)
1116 {
1117 sb.append("\n");
1118 }
1119 }
1120 else
1121 {
1122 sb.append(">");
1123 if (hasElements && indent)
1124 {
1125 sb.append("\n");
1126 }
1127
1128 int len = children.getLength();
1129 for (int i = 0; i < len; i++)
1130 {
1131 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1132 }
1133
1134 if (indent)
1135 {
1136 for (int i = 0; i < depth; i++)
1137 {
1138 sb.append(indentString);
1139 }
1140 }
1141
1142 sb.append("</" + e.getNodeName() + ">");
1143
1144 if ((hasElements && indent) || indentSwapped)
1145 {
1146 sb.append("\n");
1147 }
1148 }
1149 }
1150
1151 public static String xmlNodeToStringWithoutIndenting(Node e)
1152 {
1153 StringBuffer sb = new StringBuffer("");
1154 xmlNodeToStringWithoutNewline(sb, e, -1);
1155 return sb.toString();
1156 }
1157
1158 public static String xmlNodeToStringWithoutNewline(Node e)
1159 {
1160 StringBuffer sb = new StringBuffer("");
1161 xmlNodeToStringWithoutNewline(sb, e, 0);
1162 return sb.toString();
1163 }
1164
1165 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1166 {
1167
1168 for (int i = 0; i < depth; i++)
1169 {
1170 sb.append(' ');
1171 }
1172
1173 if (e.getNodeType() == Node.TEXT_NODE)
1174 {
1175 if (e.getNodeValue() != "")
1176 {
1177 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1178 }
1179 return;
1180 }
1181
1182 if (e.getNodeType() == Node.COMMENT_NODE)
1183 {
1184 if (e.getNodeValue() != "")
1185 {
1186 sb.append("<!--" + e.getNodeValue() + "-->");
1187 }
1188 return;
1189 }
1190
1191 sb.append('<');
1192 sb.append(e.getNodeName());
1193 NamedNodeMap attrs = e.getAttributes();
1194 if (attrs != null)
1195 {
1196 for (int i = 0; i < attrs.getLength(); i++)
1197 {
1198 Node attr = attrs.item(i);
1199 sb.append(' ');
1200 sb.append(attr.getNodeName());
1201 sb.append("=\"");
1202 sb.append(attr.getNodeValue());
1203 sb.append('"');
1204 }
1205 }
1206 NodeList children = e.getChildNodes();
1207
1208 if (children == null || children.getLength() == 0)
1209 sb.append("/>");
1210 else
1211 {
1212
1213 sb.append(">");
1214
1215 int len = children.getLength();
1216 for (int i = 0; i < len; i++)
1217 {
1218 if (depth >= 0)
1219 {
1220 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1221 }
1222 else
1223 {
1224 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1225 }
1226 }
1227
1228 for (int i = 0; i < depth; i++)
1229 sb.append(' ');
1230
1231 sb.append("</" + e.getNodeName() + ">");
1232 }
1233 }
1234
1235
1236
1237 // This method will convert an Element to a String too, like xmlNodeToString() above.
1238 // But for a document root element (doc.getDocumentElement()), this method will additionally
1239 // return its processing instruction line at the start (<?xml ... ?>).
1240 // This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1241 public static String elementToString(Element e, boolean indent)
1242 {
1243 String str = "";
1244 try
1245 {
1246 TransformerFactory tf = TransformerFactory.newInstance();
1247 Transformer trans = tf.newTransformer();
1248 StringWriter sw = new StringWriter();
1249 if (indent)
1250 {
1251 trans.setOutputProperty(OutputKeys.INDENT, "yes");
1252 }
1253 else
1254 {
1255 trans.setOutputProperty(OutputKeys.INDENT, "no");
1256 }
1257 trans.transform(new DOMSource(e), new StreamResult(sw));
1258 str = sw.toString();
1259 }
1260 catch (Exception ex)
1261 {
1262 str += "Exception: couldn't write " + e + " to log";
1263 }
1264 finally
1265 {
1266 return str;
1267 }
1268 }
1269}
Note: See TracBrowser for help on using the repository browser.