Changeset 32775

Show
Ignore:
Timestamp:
13.02.2019 20:23:04 (2 months ago)
Author:
ak19
Message:

Implementing further improvements, suggested by Dr Bainbridge. Partly returning to the changes committed in rev 32773 as they were relevant to some of the suggested solutions. Changes in this commit are 1. Better function names. 2. Tilda is now also treated as an unsafe character by following the unsafe list at  https://perishablepress.com/stop-using-unsafe-characters-in-urls/ literally. Encoding safe characters doesn't matter anyway, since everything comes out url decoded on the other end. 3. More comments to relate and differentiate the existing JavaScript? functions encodeURI() and encodeURIComponent() to our conceptually similar makeURLSafe() and makeURLComponentSafe() that are different implementation-wise as they follow the partially overlapping but distinct set of unsafe and reserved chars listed at  https://perishablepress.com/stop-using-unsafe-characters-in-urls/. 4. Replacement of each by its URL encoded variant now improved by basing the replace() calls on the previous commit (r 32773) which called a callback function. The callback function now calls the new urlEncodeChar() function which converts a char to its ord then hex (with % prefix), as described by Dr Bainbridge.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/web/interfaces/default/js/document_scripts.js

    r32774 r32775  
    2424* EXPANSION SCRIPTS * 
    2525********************/ 
     26 
     27/*  
     28   Given a string consisting of a single character, returns the %hex (%XX) 
     29   https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php 
     30   https://stackoverflow.com/questions/40100096/what-is-equivalent-php-chr-and-ord-functions-in-javascript 
     31   https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php 
     32*/ 
     33function urlEncodeChar(single_char_string) { 
     34    /*var hex = Number(single_char_string.charCodeAt(0)).toString(16); 
     35    var str = "" + hex; 
     36    str = "%" + str.toUpperCase(); 
     37    return str; 
     38    */ 
     39 
     40    var hex = "%" + Number(single_char_string.charCodeAt(0)).toString(16).toUpperCase(); 
     41    return hex; 
     42} 
    2643 
    2744/* 
     
    4057  Possibly more info: https://stackoverflow.com/questions/1547899/which-characters-make-a-url-invalid 
    4158 
     59  Javascript already provides functions encodeURI() and encodeURIComponent(), see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI 
     60  However, the set of chars they deal with only partially overlap with the set of chars that need encoding as per the RFC3986 for URIs and RFC1738 for URLs discussed at 
     61  https://perishablepress.com/stop-using-unsafe-characters-in-urls/ 
     62  We want to handle all the characters listed as unsafe and reserved at https://perishablepress.com/stop-using-unsafe-characters-in-urls/ 
     63  so we define and use our own conceptually equivalent methods for both existing JavaScript methods:  
     64  - makeSafeURL() for Javascript's encodeURI() to make sure all unsafe characters in URLs are escaped by being URL encoded 
     65  - and makeSafeURLComponent() for JavaScript's encodeURIComponent to additionally make sure all reserved characters in a URL portion are escaped by being URL encoded too 
     66 
     67  Function makeSafeURL() is passed a string that represents a URL and therefore only deals with characters that are unsafe in a URL and which therefore require escaping.  
     68  Function makeSafeURLComponent() deals with portions of a URL that when decoded need not represent a URL at all, for example data like inline templates passed in as a 
     69  URL query string's parameter values. As such makeSafeURLComponent() should escape both unsafe URL characters and characters that are reserved in URLs since reserved 
     70  characters in the query string part (as query param values representing data) may take on a different meaning from their reserved meaning in a URL context. 
    4271*/ 
    43 /* URL encode RESERVED characters in a non-URL context of a URL, such as the inline template (ilt) parameter value of a URL */ 
    44 function makeSafeForURL(url_part, encode_percentages) { 
     72 
     73/* URL encodes both  
     74   - UNSAFE characters to make URL safe, by calling makeSafeURL() 
     75   - and RESERVED characters (characters that have reserved meanings within a URL) to make URL valid, since the url component parameter could use reserved characters 
     76   in a non-URL sense. For example, the inline template (ilt) parameter value of a URL could use '=' and '&' signs where these would have XSLT rather than URL meanings. 
     77   
     78   See end of https://www.w3schools.com/jsref/jsref_replace.asp to use a callback passing each captured element of a regex in str.replace() 
     79*/ 
     80function makeURLComponentSafe(url_part, encode_percentages) { 
    4581    // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript 
    4682    encode_percentages = encode_percentages || 1; // this method forces the URL-encoding of any % in url_part, e.g. do this for inline-templates that haven't ever been encoded 
    4783     
    4884    var url_encoded = makeURLSafe(url_part, encode_percentages); 
    49     url_encoded = url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26"); 
     85    //return url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26"); 
     86    url_encoded = url_encoded.replace(/[\;\/\?\:\@\=\&]/g, function(s) {  
     87    return urlEncodeChar(s); 
     88    });  
    5089    return url_encoded; 
    5190} 
    5291 
    5392/*  
    54    URL encode UNSAFE characters to make URL valid  
    55    Set encode_percentages to 1 (true) if the url isn't already partly URL encoded 
     93   URL encode UNSAFE characters to make URL passed in safe. 
     94   Set encode_percentages to 1 (true) if you don't want % signs encoded: you'd do so if the url is already partly URL encoded. 
    5695*/ 
    5796function makeURLSafe(url, encode_percentages) {     
     
    6099    var url_encoded = url; 
    61100    if(encode_percentages) { url_encoded = url_encoded.replace(/\%/g,"%25"); } // encode % first 
    62     url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D"); 
    63     url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60"); 
     101    //url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D"); 
     102    //url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60"); 
    64103    // Should we handle ~, but then what is its URL encoded value? Because https://meyerweb.com/eric/tools/dencoder/ URLencodes ~ to ~. 
     104    //return url_encoded;     
     105    url_encoded = url_encoded.replace(/[\ \"\<\>\#\{\}\|\\^\~\[\]\`]/g, function(s) {  
     106    return urlEncodeChar(s); 
     107    }); 
    65108    return url_encoded; 
    66109} 
     
    82125    template += '</xsl:template>'; 
    83126     
    84     template = makeSafeForURL(template); 
     127    template = makeURLComponentSafe(template); 
    85128     
    86129    var hlCheckBox = document.getElementById("highlightOption"); 
     
    152195    template += '</xsl:template>'; 
    153196 
    154     template = makeSafeForURL(template); 
     197    template = makeURLComponentSafe(template); 
    155198    var url = gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/document/" + sectionID + "?ilt=" + template; 
    156199 
     
    721764    ilt += '</xsl:template>'; 
    722765     
    723     ilt = makeSafeForURL(ilt); 
     766    ilt = makeURLComponentSafe(ilt); 
    724767 
    725768 
     
    9861029        template +=   '</html>'; 
    9871030        template += '</xsl:template>'; 
    988     template = makeSafeForURL(template); 
     1031    template = makeURLComponentSafe(template); 
    9891032        var url = href + "?noText=1&ilt=" + template; 
    9901033 
     
    13901433    template +=   ']</images>'; 
    13911434    template += '</xsl:template>'; 
    1392     template = makeSafeForURL(template); 
     1435    template = makeURLComponentSafe(template); 
    13931436    var url = gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/document/" + gs.cgiParams.d + "?ed=1&ilt=" + template; 
    13941437