Changeset 33016 for main


Ignore:
Timestamp:
2019-04-17T20:26:03+12:00 (5 years ago)
Author:
ak19
Message:

Getting facet searching working again in tomcat 8, where it had broken due to reserved and unsafe chars in the URL. The fix required using the recent makeURLSafe() and makeURLComponentSafe() methods in facet-scripts.js. This meant that these functions and their helper functions needed to be moved into their own script file, newly introducing utility_scripts.js, rather than remain in document_scripts.js since doc_scripts.js is not included on the query.xsl page which includes facet-scripts.js

Location:
main/trunk/greenstone3/web/interfaces/default
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/web/interfaces/default/js/document_scripts.js

    r32775 r33016  
    2525********************/
    2626
    27 /*
    28    Given a string consisting of a single character, returns the %hex (%XX)
    29    https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
    30    https://stackoverflow.com/questions/40100096/what-is-equivalent-php-chr-and-ord-functions-in-javascript
    31    https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
    32 */
    33 function urlEncodeChar(single_char_string) {
    34     /*var hex = Number(single_char_string.charCodeAt(0)).toString(16);
    35     var str = "" + hex;
    36     str = "%" + str.toUpperCase();
    37     return str;
    38     */
    39 
    40     var hex = "%" + Number(single_char_string.charCodeAt(0)).toString(16).toUpperCase();
    41     return hex;
    42 }
    43 
    44 /*
    45   Tomcat 8 appears to be stricter in requiring unsafe and reserved chars
    46   in URLs to be escaped with URL encoding
    47   See section "Character Encoding Chart of
    48   https://perishablepress.com/stop-using-unsafe-characters-in-urls/
    49   Reserved chars:
    50      ; / ? : @ = &
    51      ----->  %3B %2F %3F %3A %40 %3D %26
    52   Unsafe chars:
    53      " < > # % { } | \ ^ ~ [ ] ` and SPACE/BLANK
    54      ----> %22 %3C %3E %23 %25 %7B %7D %7C %5C %5E ~ %5B %5D %60 and %20
    55   But the above conflicts with the reserved vs unreserved listings at
    56      https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
    57   Possibly more info: https://stackoverflow.com/questions/1547899/which-characters-make-a-url-invalid
    58 
    59   Javascript already provides functions encodeURI() and encodeURIComponent(), see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
    60   However, the set of chars they deal with only partially overlap with the set of chars that need encoding as per the RFC3986 for URIs and RFC1738 for URLs discussed at
    61   https://perishablepress.com/stop-using-unsafe-characters-in-urls/
    62   We want to handle all the characters listed as unsafe and reserved at https://perishablepress.com/stop-using-unsafe-characters-in-urls/
    63   so we define and use our own conceptually equivalent methods for both existing JavaScript methods:
    64   - makeSafeURL() for Javascript's encodeURI() to make sure all unsafe characters in URLs are escaped by being URL encoded
    65   - and makeSafeURLComponent() for JavaScript's encodeURIComponent to additionally make sure all reserved characters in a URL portion are escaped by being URL encoded too
    66 
    67   Function makeSafeURL() is passed a string that represents a URL and therefore only deals with characters that are unsafe in a URL and which therefore require escaping.
    68   Function makeSafeURLComponent() deals with portions of a URL that when decoded need not represent a URL at all, for example data like inline templates passed in as a
    69   URL query string's parameter values. As such makeSafeURLComponent() should escape both unsafe URL characters and characters that are reserved in URLs since reserved
    70   characters in the query string part (as query param values representing data) may take on a different meaning from their reserved meaning in a URL context.
    71 */
    72 
    73 /* URL encodes both
    74    - UNSAFE characters to make URL safe, by calling makeSafeURL()
    75    - and RESERVED characters (characters that have reserved meanings within a URL) to make URL valid, since the url component parameter could use reserved characters
    76    in a non-URL sense. For example, the inline template (ilt) parameter value of a URL could use '=' and '&' signs where these would have XSLT rather than URL meanings.
    77  
    78    See end of https://www.w3schools.com/jsref/jsref_replace.asp to use a callback passing each captured element of a regex in str.replace()
    79 */
    80 function makeURLComponentSafe(url_part, encode_percentages) {
    81     // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
    82     encode_percentages = encode_percentages || 1; // this method forces the URL-encoding of any % in url_part, e.g. do this for inline-templates that haven't ever been encoded
    83    
    84     var url_encoded = makeURLSafe(url_part, encode_percentages);
    85     //return url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26");
    86     url_encoded = url_encoded.replace(/[\;\/\?\:\@\=\&]/g, function(s) {
    87     return urlEncodeChar(s);
    88     });
    89     return url_encoded;
    90 }
    91 
    92 /*
    93    URL encode UNSAFE characters to make URL passed in safe.
    94    Set encode_percentages to 1 (true) if you don't want % signs encoded: you'd do so if the url is already partly URL encoded.
    95 */
    96 function makeURLSafe(url, encode_percentages) {   
    97     encode_percentages = encode_percentages || 0; // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
    98 
    99     var url_encoded = url;
    100     if(encode_percentages) { url_encoded = url_encoded.replace(/\%/g,"%25"); } // encode % first
    101     //url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D");
    102     //url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60");
    103     // Should we handle ~, but then what is its URL encoded value? Because https://meyerweb.com/eric/tools/dencoder/ URLencodes ~ to ~.
    104     //return url_encoded;   
    105     url_encoded = url_encoded.replace(/[\ \"\<\>\#\{\}\|\\^\~\[\]\`]/g, function(s) {
    106     return urlEncodeChar(s);
    107     });
    108     return url_encoded;
    109 }
    11027
    11128function getTextForSection(sectionID, callback)
  • main/trunk/greenstone3/web/interfaces/default/js/facet-scripts.js

    r32110 r33016  
    2121    }
    2222   
    23     var countsString = "s1.facetQueries=&";
     23    var countsString = "s1.facetQueries=";
    2424    if(counts.length > 0)
    2525    {
    26         countsString = "s1.facetQueries=[";
    27         var countsStringBuffer = "";
     26        var countsStringBuffer = "[";
    2827        for(var i = 0; i < counts.length; i++)
    2928        {
    3029            // escape any apostrophes in facet query terms
    3130            // (ext/solr's Greenstone3SearchHandler does the other half of handling them)
    32             countsStringBuffer += "\"" + encodeURI(counts[i]).replace(/'/g, "%2527") + "\"";
     31            //countsStringBuffer += "\"" + encodeURI(counts[i]).replace(/'/g, "%2527") + "\"";
     32            // calling makeURLSafe() here will ensure percent signs are escaped away too
     33            // by the end of makeURLComponentSafe() call below
     34            countsStringBuffer += "\"" + makeURLSafe(counts[i]).replace(/'/g, "%2527") + "\"";
    3335            if(i < counts.length - 1)
    3436            {
     
    3739        }
    3840       
    39         countsString += encodeURI(countsStringBuffer) + "]&";
     41        countsStringBuffer += "]";
     42
     43        // We need to ensure that the *value* of s1.facetQueries (so everything after
     44        // s1.facetQueries= and before the connecting &) are safe, which requires escaping,
     45        // and are further also escaped to not be mistaken for their reserved meaning.
     46        // : is a reserved character in URLs, [] are unsafe characters. All need escaping.
     47        // So call makeURLComponentSafe(), not makeURLSafe()
     48        countsString = countsString + makeURLComponentSafe(countsStringBuffer, 1);
    4049    }
    4150   
    42     console.log("STRING IS " + countsString)
     51    countsString += "&";
     52    console.log("STRING IS " + countsString);
    4353   
    4454    $.ajax(gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/search/" + gs.cgiParams.s + "?" + searchString + countsString + "excerptid=resultsArea")
  • main/trunk/greenstone3/web/interfaces/default/transform/pages/document.xsl

    r32836 r33016  
    300300
    301301    <xsl:template name="javascriptForDocumentView">
     302      <script type="text/javascript" src="interfaces/{$interface_name}/js/utility_scripts.js"><xsl:text> </xsl:text></script>
    302303        <script type="text/javascript" src="interfaces/{$interface_name}/js/document_scripts.js"><xsl:text> </xsl:text></script>
    303304      <gsf:metadata name="Thumb" hidden="true"/>
  • main/trunk/greenstone3/web/interfaces/default/transform/pages/query.xsl

    r32719 r33016  
    114114
    115115    <xsl:template name="displayFacets">
     116            <script type="text/javascript" src="interfaces/{$interface_name}/js/utility_scripts.js"><xsl:text> </xsl:text></script>
    116117            <script type="text/javascript" src="interfaces/{$interface_name}/js/facet-scripts.js">
    117118                <xsl:text> </xsl:text>
Note: See TracChangeset for help on using the changeset viewer.