Ignore:
Timestamp:
2019-02-13T20:23:04+13:00 (5 years ago)
Author:
ak19
Message:

Implementing further improvements, suggested by Dr Bainbridge. Partly returning to the changes committed in rev 32773 as they were relevant to some of the suggested solutions. Changes in this commit are 1. Better function names. 2. Tilda is now also treated as an unsafe character by following the unsafe list at https://perishablepress.com/stop-using-unsafe-characters-in-urls/ literally. Encoding safe characters doesn't matter anyway, since everything comes out url decoded on the other end. 3. More comments to relate and differentiate the existing JavaScript functions encodeURI() and encodeURIComponent() to our conceptually similar makeURLSafe() and makeURLComponentSafe() that are different implementation-wise as they follow the partially overlapping but distinct set of unsafe and reserved chars listed at https://perishablepress.com/stop-using-unsafe-characters-in-urls/. 4. Replacement of each by its URL encoded variant now improved by basing the replace() calls on the previous commit (r 32773) which called a callback function. The callback function now calls the new urlEncodeChar() function which converts a char to its ord then hex (with % prefix), as described by Dr Bainbridge.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/web/interfaces/default/js/document_scripts.js

    r32774 r32775  
    2424* EXPANSION SCRIPTS *
    2525********************/
     26
     27/*
     28   Given a string consisting of a single character, returns the %hex (%XX)
     29   https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
     30   https://stackoverflow.com/questions/40100096/what-is-equivalent-php-chr-and-ord-functions-in-javascript
     31   https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
     32*/
     33function urlEncodeChar(single_char_string) {
     34    /*var hex = Number(single_char_string.charCodeAt(0)).toString(16);
     35    var str = "" + hex;
     36    str = "%" + str.toUpperCase();
     37    return str;
     38    */
     39
     40    var hex = "%" + Number(single_char_string.charCodeAt(0)).toString(16).toUpperCase();
     41    return hex;
     42}
    2643
    2744/*
     
    4057  Possibly more info: https://stackoverflow.com/questions/1547899/which-characters-make-a-url-invalid
    4158
     59  Javascript already provides functions encodeURI() and encodeURIComponent(), see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
     60  However, the set of chars they deal with only partially overlap with the set of chars that need encoding as per the RFC3986 for URIs and RFC1738 for URLs discussed at
     61  https://perishablepress.com/stop-using-unsafe-characters-in-urls/
     62  We want to handle all the characters listed as unsafe and reserved at https://perishablepress.com/stop-using-unsafe-characters-in-urls/
     63  so we define and use our own conceptually equivalent methods for both existing JavaScript methods:
     64  - makeSafeURL() for Javascript's encodeURI() to make sure all unsafe characters in URLs are escaped by being URL encoded
     65  - and makeSafeURLComponent() for JavaScript's encodeURIComponent to additionally make sure all reserved characters in a URL portion are escaped by being URL encoded too
     66
     67  Function makeSafeURL() is passed a string that represents a URL and therefore only deals with characters that are unsafe in a URL and which therefore require escaping.
     68  Function makeSafeURLComponent() deals with portions of a URL that when decoded need not represent a URL at all, for example data like inline templates passed in as a
     69  URL query string's parameter values. As such makeSafeURLComponent() should escape both unsafe URL characters and characters that are reserved in URLs since reserved
     70  characters in the query string part (as query param values representing data) may take on a different meaning from their reserved meaning in a URL context.
    4271*/
    43 /* URL encode RESERVED characters in a non-URL context of a URL, such as the inline template (ilt) parameter value of a URL */
    44 function makeSafeForURL(url_part, encode_percentages) {
     72
     73/* URL encodes both
     74   - UNSAFE characters to make URL safe, by calling makeSafeURL()
     75   - and RESERVED characters (characters that have reserved meanings within a URL) to make URL valid, since the url component parameter could use reserved characters
     76   in a non-URL sense. For example, the inline template (ilt) parameter value of a URL could use '=' and '&' signs where these would have XSLT rather than URL meanings.
     77 
     78   See end of https://www.w3schools.com/jsref/jsref_replace.asp to use a callback passing each captured element of a regex in str.replace()
     79*/
     80function makeURLComponentSafe(url_part, encode_percentages) {
    4581    // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
    4682    encode_percentages = encode_percentages || 1; // this method forces the URL-encoding of any % in url_part, e.g. do this for inline-templates that haven't ever been encoded
    4783   
    4884    var url_encoded = makeURLSafe(url_part, encode_percentages);
    49     url_encoded = url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26");
     85    //return url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26");
     86    url_encoded = url_encoded.replace(/[\;\/\?\:\@\=\&]/g, function(s) {
     87    return urlEncodeChar(s);
     88    });
    5089    return url_encoded;
    5190}
    5291
    5392/*
    54    URL encode UNSAFE characters to make URL valid
    55    Set encode_percentages to 1 (true) if the url isn't already partly URL encoded
     93   URL encode UNSAFE characters to make URL passed in safe.
     94   Set encode_percentages to 1 (true) if you don't want % signs encoded: you'd do so if the url is already partly URL encoded.
    5695*/
    5796function makeURLSafe(url, encode_percentages) {   
     
    6099    var url_encoded = url;
    61100    if(encode_percentages) { url_encoded = url_encoded.replace(/\%/g,"%25"); } // encode % first
    62     url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D");
    63     url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60");
     101    //url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D");
     102    //url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60");
    64103    // Should we handle ~, but then what is its URL encoded value? Because https://meyerweb.com/eric/tools/dencoder/ URLencodes ~ to ~.
     104    //return url_encoded;   
     105    url_encoded = url_encoded.replace(/[\ \"\<\>\#\{\}\|\\^\~\[\]\`]/g, function(s) {
     106    return urlEncodeChar(s);
     107    });
    65108    return url_encoded;
    66109}
     
    82125    template += '</xsl:template>';
    83126   
    84     template = makeSafeForURL(template);
     127    template = makeURLComponentSafe(template);
    85128   
    86129    var hlCheckBox = document.getElementById("highlightOption");
     
    152195    template += '</xsl:template>';
    153196
    154     template = makeSafeForURL(template);
     197    template = makeURLComponentSafe(template);
    155198    var url = gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/document/" + sectionID + "?ilt=" + template;
    156199
     
    721764    ilt += '</xsl:template>';
    722765   
    723     ilt = makeSafeForURL(ilt);
     766    ilt = makeURLComponentSafe(ilt);
    724767
    725768
     
    9861029        template +=   '</html>';
    9871030        template += '</xsl:template>';
    988     template = makeSafeForURL(template);
     1031    template = makeURLComponentSafe(template);
    9891032        var url = href + "?noText=1&ilt=" + template;
    9901033
     
    13901433    template +=   ']</images>';
    13911434    template += '</xsl:template>';
    1392     template = makeSafeForURL(template);
     1435    template = makeURLComponentSafe(template);
    13931436    var url = gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/document/" + gs.cgiParams.d + "?ed=1&ilt=" + template;
    13941437
Note: See TracChangeset for help on using the changeset viewer.