Changeset 33043


Ignore:
Timestamp:
2019-04-30T18:33:42+12:00 (5 years ago)
Author:
ak19
Message:

Not a bugfix, but to help with encoding issues, including to help with current, still unresolved encoding issue. 1. Introduction of string2hex functions in JavaScript and Java. 2. Overriding GSXML.elemtToString() with introduction of additional debugEncoding parameter that will turn on string2hex use when printing request and response XMLs. Now non-basic ASCII characters in the XML will be printed in hex if debugEncoding parameter passed in is true. 3. The inactive Connector elements in server8.xml.svn now also have the attribute URIEncoding set to UTF-8. Some of these inactive Connectors get turned on at times, such as for https. In which case we will need tomcact to also interpret get/post data coming in through those connectors to be sent on as utf-8.

Location:
main/trunk/greenstone3
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/resources/tomcat/server_tomcat8.xml.svn

    r32696 r33043  
    7070           port="@localhost.port.http@"  protocol="HTTP/1.1"
    7171               connectionTimeout="20000"
    72                redirectPort="@https.redirect.port@" />
     72               redirectPort="@https.redirect.port@"
     73               URIEncoding="UTF-8" />
    7374    -->
    7475    <!-- A "Connector" using the shared thread pool-->   
     
    9697    <!--
    9798    <Connector port="@https.redirect.port@" protocol="org.apache.coyote.http11.Http11NioProtocol"
    98                maxThreads="150" SSLEnabled="true">
     99               maxThreads="150" SSLEnabled="true" URIEncoding="UTF-8">
    99100        <SSLHostConfig>
    100101            <Certificate certificateKeystoreFile="conf/localhost-rsa.jks"
     
    111112    <!--
    112113    <Connector port="@https.redirect.port@" protocol="org.apache.coyote.http11.Http11AprProtocol"
    113                maxThreads="150" SSLEnabled="true" >
     114               maxThreads="150" SSLEnabled="true" URIEncoding="UTF-8" >
    114115        <UpgradeProtocol className="org.apache.coyote.http2.Http2Protocol" />
    115116        <SSLHostConfig>
     
    127128            keystorePass="@keystore.pass@"
    128129            clientAuth="false" sslProtocol="TLS"
    129         keystoreType="@keystore.type@" />
     130        keystoreType="@keystore.type@"
     131        URIEncoding="UTF-8" />
    130132    @https.comment.out.end@
    131133
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/util/GSXML.java

    r32942 r33043  
    4343
    4444import org.greenstone.gsdl3.util.MyNodeList;
     45import org.greenstone.util.Misc;
     46
    4547/** various functions for extracting info out of GS XML */
    4648public class GSXML
     
    16291631    }
    16301632
    1631     public static String elementToString(Element e, boolean indent)
     1633    // pass in debugEncoding=true to investigate encoding issues. This function will then return non-basic ASCII characters in hex
     1634    public static String elementToString(Element e, boolean indent, boolean debugEncoding)
    16321635    {
    16331636        String str = "";
     
    16471650            trans.transform(new DOMSource(e), new StreamResult(sw));
    16481651            str += sw.toString();
     1652           
     1653            // if debugging encoding issues, then encode unicode code pts as hex for all but non-alphanumeric and space/tab/newline chars
     1654            if(debugEncoding) str = Misc.stringToHex(str);
    16491655        }
    16501656        catch (Exception ex)
     
    16581664    }
    16591665
     1666    public static String elementToString(Element e, boolean indent)
     1667    {
     1668        return elementToString(e, indent, false);
     1669    }
     1670   
    16601671    public static ArrayList<String> getGroupsFromSecurityResponse(Element securityResponse)
    16611672    {
  • main/trunk/greenstone3/src/java/org/greenstone/util/Misc.java

    r32608 r33043  
    5656    }   
    5757    }
    58 
     58   
     59   
     60    // Debugging function to print a string's non-basic chars in hex
     61    // Based on https://stackoverflow.com/questions/923863/converting-a-string-to-hexadecimal-in-java
     62    public static String stringToHex(String str) {
     63      String result = "";
     64      for(int i = 0; i < str.length(); i++) {
     65            int charCode = str.codePointAt(i); // unicode codepoint / ASCII code
     66           
     67            // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png
     68            // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format)
     69            if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13) { // space to tilda, TAB, LF, CR are printable
     70                result += str.charAt(i);
     71            } else {
     72                result += "x" + String.format("%04x", charCode);
     73            }
     74      }
     75     
     76      return result;
     77    }
     78   
     79   
    5980    public static void printHash(HashMap map) {
    6081    Set entries = map.entrySet();
  • main/trunk/greenstone3/web/interfaces/default/js/javascript-global-functions.js

    r32893 r33043  
    1111{
    1212    return $("#" + id.replace(/\./g, "\\.").replace(/:/g,"\\:"));
     13}
     14
     15// Debugging function to print a string's non-basic chars in hex
     16// Based on https://stackoverflow.com/questions/36637146/javascript-encode-string-to-hex/36637293
     17// https://stackoverflow.com/questions/21647928/javascript-unicode-string-to-hex
     18gs.functions.string2hex = function(str) {
     19    var hex, i;
     20
     21    var result = "";
     22    for (i=0; i<str.length; i++) {
     23        charcode = str.charCodeAt(i);
     24        // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png
     25        // if the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format)
     26        if(charcode < 20 || charcode > 126) { //doesn't work: if(str.charAt(i) < ' ' || str.charAt(i) > '~') {
     27            hex = charcode.toString(16);
     28            result += "x" + ("000"+hex).slice(-4);
     29        }
     30        else {
     31            result += str.charAt(i);
     32        }
     33    }
     34
     35    return result;
    1336}
    1437
Note: See TracChangeset for help on using the changeset viewer.