Changeset 34100


Ignore:
Timestamp:
2020-04-09T11:39:25+12:00 (18 months ago)
Author:
ak19
Message:

Related to trac changesets committed for 33793, 34098 and 34099 related to fixing encoding related parsing issues on different (Chinese) locales. Those commits were necessary to support different locales, but also had the side-effect of apparently fixing the issue of UTF-8 like a-macron chars not displaying correctly in the running GS3 server. A fix to resolve the same issue for when GS3 was launched in other ways (not necessary through our own tomcat) was needed, however. This commit carries out the idea Dr Bainbridge described in the email thread with subject 'GS3 encoding problem reproduces immediately', in his email response dated Apr 8, 10:50 AM.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/XMLConverter.java

    r33897 r34100  
    7272    static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
    7373
     74   
     75    public static final String XML_FILE_ENCODING="UTF-8";
     76
     77    static {
     78    String fileEncodingProperty = System.getProperty("file.encoding");
     79
     80    // log a warning if the file.encoding Java property departs from XML_FILE_ENCODING,
     81    // but always use the latter for reading in XML files in getDOM(File, ...) methods
     82    if(fileEncodingProperty == null) {
     83        logger.warn("file.encoding Java property was not set.");
     84        System.err.println("Using XMLConverter.XML_FILE_ENCODING="+XML_FILE+ENCODING+"in XMLConverter.getDOM() calls.");
     85    }
     86    else if(!fileEncodingProperty.equals(XML_FILE_ENCODING)) {
     87        logger.warn("file.encoding property passed in with JAVA_TOOLS_OPTIONS: "
     88                   + fileEncodingProperty);
     89        logger.warn("does not match XMLConverter.XML_FILE_ENCODING: " + XML_FILE_ENCODING);
     90        logger.warn("Using XMLConverter.XML_FILE_ENCODING in XMLConverter.getDOM() calls.");
     91    }
     92    };
     93   
     94
    7495    /** the no-args constructor */
    7596    public XMLConverter()
     
    7899    }
    79100
    80     /** returns a DOM Document */
     101    /** returns a DOM Document
     102     * Question: why is this not simply calling the getDOM(String in, String encoding) version?
     103     * Answer: Because the string, being a Java String, is already fully formed in unicode,
     104     * so it doesn't need to call that other version.
     105     */
    81106  public static Document getDOM(String in)
    82107    {
     
    123148    /** returns a DOM Document */
    124149  public static Document getDOM(File in) {
     150
     151      // now we're always going to explicitly use XML_FILE_ENCODING (UTF-8)
     152      // as the encoding to read in a file unless otherwise specified.
     153      return getDOM(in, XML_FILE_ENCODING, null);
     154     
     155      /*
    125156        try
    126157        {
     
    140171        }
    141172        return null;
     173      */
    142174    }
    143175
     
    169201
    170202  public static Document getDOM(File in, EntityResolver er) {
    171    
     203      // now we're always going to explicitly use XML_FILE_ENCODING (UTF-8)
     204      // as the encoding to read in a file unless otherwise specified.
     205      return getDOM(in, XML_FILE_ENCODING, er);
     206      /*
    172207    try {     
    173208      InputSource xml_source = new InputSource(new FileInputStream(in));
     
    182217      }
    183218    return null;
     219      */
    184220  }
    185221
Note: See TracChangeset for help on using the changeset viewer.