- Timestamp:
- 2009-01-12T11:17:33+13:00 (15 years ago)
- Location:
- gli/branches/rtl-gli/src/org/greenstone/gatherer/util
- Files:
-
- 2 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
gli/branches/rtl-gli/src/org/greenstone/gatherer/util/StaticStrings.java
r14241 r18353 42 42 static final public String ACCENTFOLD_OPTION_STR = "accentfold"; 43 43 static final public String ALLFIELDS_STR = "allfields"; 44 static final public String ARCPLUG_STR = "Arc Plug";44 static final public String ARCPLUG_STR = "ArchivesInfPlugin"; 45 45 static final public String ASSIGNED_ATTRIBUTE = "assigned"; 46 46 static final public String ASSOCIATIONS_ELEMENT = "Associations"; … … 73 73 static final public String COLLECTIONMETADATA_PUBLIC_ELEMENT = "Public"; 74 74 static final public String COLLECTIONMETADATA_PUBLIC_STR = "public"; 75 static final public String COLLECTIONMETADATA_COLLECTGROUP_STR = "collectgroup"; 75 76 static final public String COLLECTIONMETADATA_STR = "collectionmeta"; 76 77 static final public String COLON_CHARACTER = ":"; … … 114 115 static final public String FORMAT_END_TAG = "</format>"; 115 116 static final public String FURTHER_DIALOG_INDICATOR = "..."; 117 static final public String FEDORA_MODE= "-fedora"; 118 static final public String FEDORA_HOME = "-fedora_home"; 119 static final public String FEDORA_VERSION = "-fedora_version"; 120 static final public String FEDORA_HOSTNAME = "-fedora_hostname"; 121 static final public String FEDORA_PORT = "-fedora_port"; 122 static final public String FEDORA_USERNAME = "-fedora_username"; 123 static final public String FEDORA_PASSWORD = "-fedora_password"; 124 static final public String FEDORA_PROTOCOL = "-fedora_protocol"; 116 125 static final public String GLI_ATTRIBUTE = "gli"; 117 126 static final public String GLISERVER_URL_ARGUMENT = "-gliserver_url"; … … 144 153 static final public String INT_STR = "int"; 145 154 static final public String IMPORT_STR = "import"; 146 static final public String[] KEEP_PLUG = { "G APlug", "METSPlug" };155 static final public String[] KEEP_PLUG = { "GreenstoneXMLPlugin", "GreenstoneMETSPlugin" }; 147 156 static final public String LANGUAGE_ARGUMENT = "l="; 148 157 static final public String LANGUAGE_ATTRIBUTE = "language"; … … 181 190 static final public String METADATA_TYPE_STR = "metadata"; 182 191 static final public String METADATA_XML = "metadata.xml"; 183 static final public String METADATAXMLPLUG_STR = "MetadataXMLPlug ";192 static final public String METADATAXMLPLUG_STR = "MetadataXMLPlugin"; 184 193 static final public String METADATUM_TYPE_STR = "metadatum"; 185 194 static final public String MGPP_ATTRIBUTE = "mgpp_enabled"; … … 207 216 static final public String PREDEFINED_METADATA_ATTRIBUTE = "predefined"; 208 217 static final public String RBRACKET_CHARACTER = "]"; 209 static final public String RECPLUG_STR = " RecPlug";218 static final public String RECPLUG_STR = "DirectoryPlugin"; 210 219 static final public String REGEXP_STR = "regexp"; 211 220 static final public String REPLACELISTREF_STR = "replaceListRef"; … … 216 225 static final public String SECTION_ELEMENT = "Section"; 217 226 static final public String SECTION_STR = "section"; 227 static final public String SEPARATE_CJK_OPTION_STR = "separate_cjk"; 218 228 static final public String SEPARATOR_ATTRIBUTE = "separator"; 219 229 static final public String SEPARATOR_CHARACTER = "/"; … … 246 256 static final public String TYPE_ATTRIBUTE = "type"; 247 257 static final public String UNKNOWN_ELEMENT = "Unknown"; 248 static final public String UNKNOWNPLUG_STR = "UnknownPlug ";258 static final public String UNKNOWNPLUG_STR = "UnknownPlugin"; 249 259 static final public String USE_METADATA_FILES_ARGUMENT = "use_metadata_files"; 250 260 static final public String USE_REMOTE_GREENSTONE_ARGUMENT = "-use_remote_greenstone"; -
gli/branches/rtl-gli/src/org/greenstone/gatherer/util/XMLTools.java
r14053 r18353 261 261 static final public String NOTWELLFORMED= "not well-formed"; 262 262 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>"; 263 263 static final private String FOOTER = "</collectionConfig>"; 264 264 265 265 public static String parse (String xml_str) { … … 332 332 SAXParser parser = factory.newSAXParser (); 333 333 FileReader r = new FileReader(xml_file); 334 334 InputSource iSource = new InputSource(r); 335 335 XMLReader reader = parser.getXMLReader (); 336 336 reader.setContentHandler(new DefaultHandler()); … … 372 372 } // getLocationString(SAXParseException):String 373 373 374 374 375 /** Parse an XML document from a given file path */ 375 376 static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) { … … 409 410 try { 410 411 InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8"); 411 Reader xml_reader = new BufferedReader (isr); 412 document = parseXML (xml_reader); 412 document = parseXML(isr); 413 413 isr.close (); 414 414 xml_input_stream.close (); … … 425 425 static public Document parseXML (Reader xml_reader) { 426 426 Document document = null; 427 428 try { 429 InputSource isc = new InputSource (xml_reader); 430 DOMParser parser = new DOMParser (); 431 parser.setFeature ("http://xml.org/sax/features/validation", false); 432 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 433 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 434 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 435 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 436 parser.parse (isc); 437 document = parser.getDocument (); 438 } 439 catch (SAXException exception) { 440 System.err.println ("SAX exception: " + exception.getMessage ()); 441 DebugStream.printStackTrace (exception); 427 428 // If debugging, the following will store the XML contents to be parsed, 429 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 430 String xmlContents = ""; 431 432 try { 433 Reader reader = null; 434 435 // (1) By default, GLI will remove any contents preceeding (and invalidating) 436 // the XML and present these lines separately to the user 437 if(!DebugStream.isDebuggingEnabled()) { 438 try { 439 reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) ); 440 } catch ( Exception e ) { 441 System.err.println( "Exception while wrapping the reader in parseXML(Reader)" ); 442 e.printStackTrace(); 443 } 444 } 445 446 // (2) If we are running GLI in debug mode: 447 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some 448 // idea of where things went wrong. This will print the "XML" contents to either 449 // system.out (if debugging is off) or to the DebugStream otherwise. 450 // We need to read the XML twice to know the line where things went wrong, so 451 // do the additional reading only if we're debugging 452 else { 453 StringBuffer buf = new StringBuffer(); 454 char[] buffer = new char[500]; 455 int numCharsRead = xml_reader.read(buffer, 0, buffer.length); 456 while(numCharsRead != -1) { 457 buf.append(buffer, 0, numCharsRead); 458 numCharsRead = xml_reader.read(buffer, 0, buffer.length); 459 } 460 xmlContents = buf.toString(); 461 xml_reader.close(); // closing the old Reader 462 xml_reader = null; 463 buffer = null; 464 buf = null; 465 // we need a Reader to parse the same contents as the Reader that was just closed 466 reader = new BufferedReader(new StringReader(xmlContents)); 467 } 468 469 // (2) The actual XML parsing 470 InputSource isc = new InputSource (reader); 471 DOMParser parser = new DOMParser (); 472 parser.setFeature ("http://xml.org/sax/features/validation", false); 473 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 474 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 475 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 476 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 477 parser.parse (isc); 478 document = parser.getDocument (); 479 480 } catch(SAXParseException e) { 481 showXMLParseFailureLine(e, xmlContents); 482 } catch (SAXException exception) { 483 System.err.println ("SAX exception: " + exception.getMessage ()); 484 if(DebugStream.isDebuggingEnabled()) { 485 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 486 + xmlContents + "\n************END\n"); 487 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 488 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); 489 System.exit(-1); 490 } 491 // else, not running in debug mode, so don't exit after exception 492 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 493 DebugStream.printStackTrace (exception); 442 494 } 443 495 catch (Exception exception) { … … 447 499 return document; 448 500 } 449 501 502 /** Displays the line (string) where the SAXParseException occurred, given a String of the 503 * entire xml that was being parsed and the SAXParseException object that was caught. 504 * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output. 505 * @param xmlContents is the entire xml that was being parsed when the exception occurred 506 * @param e is the SAXParseException object that was thrown upon parsing the xmlContents. 507 */ 508 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { 509 510 // There should be no characters at all that preceed the <?xml>... bit. 511 // The first check is for starting spaces: 512 if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { 513 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 514 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 515 return; // nothing more to do, first error identified 516 } 517 518 // the actual line (String literal) where parsing failed and the SAXParseException occurred. 519 String line = ""; 520 int linenumber = e.getLineNumber(); 521 DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); 522 if(DebugStream.isDebuggingEnabled()) { 523 if(linenumber != -1) { 524 // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) 525 int start = 0; 526 int end = xmlContents.length(); 527 for(int i = 1; i <= linenumber; i++) { 528 end = xmlContents.indexOf("\n"); 529 if(end > 0) { 530 line = xmlContents.substring(start, end); 531 } 532 start = end+1; 533 } 534 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 535 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 536 } else { // no particular line number, print out all the xml so debugger can inspect it 537 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 538 + xmlContents + "\n************END\n"); 539 } 540 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 541 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML..."); 542 System.exit(-1); 543 } else { // not running in debug mode 544 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 545 } 546 } 547 450 548 451 549 static public StringBuffer readXMLStream (InputStream input_stream) { … … 540 638 f.setLineWidth (0); // Why isn't this working! 541 639 f.setPreserveSpace (false); 542 543 f.setNonEscapingElements (nonEscapingTagNames); 640 if (nonEscapingTagNames != null) { 641 f.setNonEscapingElements (nonEscapingTagNames); 642 } 544 643 // Create the necessary writer stream for serialization. 545 644 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); … … 560 659 /** Write an XML document to a given file */ 561 660 static public void writeXMLFile (File xml_file, Document document) { 562 try { 563 OutputStream os = new FileOutputStream (xml_file); 564 // Create an output format for our document. 565 OutputFormat f = new OutputFormat (document); 566 f.setEncoding ("UTF-8"); 567 f.setIndenting (true); 568 f.setLineWidth (0); // Why isn't this working! 569 f.setPreserveSpace (false); 570 // Create the necessary writer stream for serialization. 571 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); 572 Writer w = new BufferedWriter (osw); 573 // Generate a new serializer from the above. 574 XMLSerializer s = new XMLSerializer (w, f); 575 s.asDOMSerializer (); 576 // Finally serialize the document to file. 577 s.serialize (document); 578 // And close. 579 os.close (); 580 } 581 catch (Exception exception) { 582 DebugStream.printStackTrace (exception); 583 } 584 } 585 661 writeXMLFile(xml_file, document, null); 662 } 663 586 664 public static void printXMLNode (Node e) { 587 665 printXMLNode (e, 0) ; … … 686 764 687 765 } 766 767 public static String xmlNodeToStringWithoutIndenting (Node e) { 768 StringBuffer sb = new StringBuffer (""); 769 xmlNodeToStringWithoutNewline(sb, e, -1); 770 return sb.toString(); 771 } 688 772 public static String xmlNodeToStringWithoutNewline (Node e){ 689 773 StringBuffer sb = new StringBuffer (""); … … 696 780 for (int i=0 ; i<depth ; i++) 697 781 sb.append (' ') ; 698 782 699 783 if (e.getNodeType () == Node.TEXT_NODE){ 700 784 if (e.getNodeValue () != "") { … … 727 811 int len = children.getLength (); 728 812 for (int i = 0; i < len; i++) { 813 if (depth >= 0) { 729 814 xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1); 815 } else { 816 xmlNodeToStringWithoutNewline (sb,children.item (i), depth); 817 } 730 818 } 731 819 … … 735 823 sb.append ("</" + e.getNodeName () + ">"); 736 824 } 737 738 739 } 740 825 } 741 826 }
Note:
See TracChangeset
for help on using the changeset viewer.