source: main/trunk/greenstone3/web/interfaces/default/js/utility_scripts.js@ 33016

Last change on this file since 33016 was 33016, checked in by ak19, 5 years ago

Getting facet searching working again in tomcat 8, where it had broken due to reserved and unsafe chars in the URL. The fix required using the recent makeURLSafe() and makeURLComponentSafe() methods in facet-scripts.js. This meant that these functions and their helper functions needed to be moved into their own script file, newly introducing utility_scripts.js, rather than remain in document_scripts.js since doc_scripts.js is not included on the query.xsl page which includes facet-scripts.js

File size: 5.5 KB
Line 
1/** JavaScript file of utility functions.
2 * At present contains functions for sanitising of URLs,
3 * since tomcat 8+, being more compliant with URL/URI standards, is more strict about URLs.
4 */
5
6/*
7 Given a string consisting of a single character, returns the %hex (%XX)
8 https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
9 https://stackoverflow.com/questions/40100096/what-is-equivalent-php-chr-and-ord-functions-in-javascript
10 https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
11*/
12function urlEncodeChar(single_char_string) {
13 /*var hex = Number(single_char_string.charCodeAt(0)).toString(16);
14 var str = "" + hex;
15 str = "%" + str.toUpperCase();
16 return str;
17 */
18
19 var hex = "%" + Number(single_char_string.charCodeAt(0)).toString(16).toUpperCase();
20 return hex;
21}
22
23/*
24 Tomcat 8 appears to be stricter in requiring unsafe and reserved chars
25 in URLs to be escaped with URL encoding
26 See section "Character Encoding Chart of
27 https://perishablepress.com/stop-using-unsafe-characters-in-urls/
28 Reserved chars:
29 ; / ? : @ = &
30 -----> %3B %2F %3F %3A %40 %3D %26
31 Unsafe chars:
32 " < > # % { } | \ ^ ~ [ ] ` and SPACE/BLANK
33 ----> %22 %3C %3E %23 %25 %7B %7D %7C %5C %5E ~ %5B %5D %60 and %20
34 But the above conflicts with the reserved vs unreserved listings at
35 https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
36 Possibly more info: https://stackoverflow.com/questions/1547899/which-characters-make-a-url-invalid
37
38 Javascript already provides functions encodeURI() and encodeURIComponent(), see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
39 However, the set of chars they deal with only partially overlap with the set of chars that need encoding as per the RFC3986 for URIs and RFC1738 for URLs discussed at
40 https://perishablepress.com/stop-using-unsafe-characters-in-urls/
41 We want to handle all the characters listed as unsafe and reserved at https://perishablepress.com/stop-using-unsafe-characters-in-urls/
42 so we define and use our own conceptually equivalent methods for both existing JavaScript methods:
43 - makeSafeURL() for Javascript's encodeURI() to make sure all unsafe characters in URLs are escaped by being URL encoded
44 - and makeSafeURLComponent() for JavaScript's encodeURIComponent to additionally make sure all reserved characters in a URL portion are escaped by being URL encoded too
45
46 Function makeSafeURL() is passed a string that represents a URL and therefore only deals with characters that are unsafe in a URL and which therefore require escaping.
47 Function makeSafeURLComponent() deals with portions of a URL that when decoded need not represent a URL at all, for example data like inline templates passed in as a
48 URL query string's parameter values. As such makeSafeURLComponent() should escape both unsafe URL characters and characters that are reserved in URLs since reserved
49 characters in the query string part (as query param values representing data) may take on a different meaning from their reserved meaning in a URL context.
50*/
51
52/* URL encodes both
53 - UNSAFE characters to make URL safe, by calling makeSafeURL()
54 - and RESERVED characters (characters that have reserved meanings within a URL) to make URL valid, since the url component parameter could use reserved characters
55 in a non-URL sense. For example, the inline template (ilt) parameter value of a URL could use '=' and '&' signs where these would have XSLT rather than URL meanings.
56
57 See end of https://www.w3schools.com/jsref/jsref_replace.asp to use a callback passing each captured element of a regex in str.replace()
58*/
59function makeURLComponentSafe(url_part, encode_percentages) {
60 // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
61 encode_percentages = encode_percentages || 1; // this method forces the URL-encoding of any % in url_part, e.g. do this for inline-templates that haven't ever been encoded
62
63 var url_encoded = makeURLSafe(url_part, encode_percentages);
64 //return url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26");
65 url_encoded = url_encoded.replace(/[\;\/\?\:\@\=\&]/g, function(s) {
66 return urlEncodeChar(s);
67 });
68 return url_encoded;
69}
70
71/*
72 URL encode UNSAFE characters to make URL passed in safe.
73 Set encode_percentages to 1 (true) if you don't want % signs encoded: you'd do so if the url is already partly URL encoded.
74*/
75function makeURLSafe(url, encode_percentages) {
76 encode_percentages = encode_percentages || 0; // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
77
78 var url_encoded = url;
79 if(encode_percentages) { url_encoded = url_encoded.replace(/\%/g,"%25"); } // encode % first
80 //url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D");
81 //url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60");
82 // Should we handle ~, but then what is its URL encoded value? Because https://meyerweb.com/eric/tools/dencoder/ URLencodes ~ to ~.
83 //return url_encoded;
84 url_encoded = url_encoded.replace(/[\ \"\<\>\#\{\}\|\\^\~\[\]\`]/g, function(s) {
85 return urlEncodeChar(s);
86 });
87 return url_encoded;
88}
Note: See TracBrowser for help on using the repository browser.