1 | /** JavaScript file of utility functions.
|
---|
2 | * At present contains functions for sanitising of URLs,
|
---|
3 | * since tomcat 8+, being more compliant with URL/URI standards, is more strict about URLs.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | Given a string consisting of a single character, returns the %hex (%XX)
|
---|
8 | https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
|
---|
9 | https://stackoverflow.com/questions/40100096/what-is-equivalent-php-chr-and-ord-functions-in-javascript
|
---|
10 | https://www.w3resource.com/javascript-exercises/javascript-string-exercise-27.php
|
---|
11 | */
|
---|
12 | function urlEncodeChar(single_char_string) {
|
---|
13 | /*var hex = Number(single_char_string.charCodeAt(0)).toString(16);
|
---|
14 | var str = "" + hex;
|
---|
15 | str = "%" + str.toUpperCase();
|
---|
16 | return str;
|
---|
17 | */
|
---|
18 |
|
---|
19 | var hex = "%" + Number(single_char_string.charCodeAt(0)).toString(16).toUpperCase();
|
---|
20 | return hex;
|
---|
21 | }
|
---|
22 |
|
---|
23 | /*
|
---|
24 | Tomcat 8 appears to be stricter in requiring unsafe and reserved chars
|
---|
25 | in URLs to be escaped with URL encoding
|
---|
26 | See section "Character Encoding Chart of
|
---|
27 | https://perishablepress.com/stop-using-unsafe-characters-in-urls/
|
---|
28 | Reserved chars:
|
---|
29 | ; / ? : @ = &
|
---|
30 | -----> %3B %2F %3F %3A %40 %3D %26
|
---|
31 | Unsafe chars:
|
---|
32 | " < > # % { } | \ ^ ~ [ ] ` and SPACE/BLANK
|
---|
33 | ----> %22 %3C %3E %23 %25 %7B %7D %7C %5C %5E ~ %5B %5D %60 and %20
|
---|
34 | But the above conflicts with the reserved vs unreserved listings at
|
---|
35 | https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
|
---|
36 | Possibly more info: https://stackoverflow.com/questions/1547899/which-characters-make-a-url-invalid
|
---|
37 |
|
---|
38 | Javascript already provides functions encodeURI() and encodeURIComponent(), see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
|
---|
39 | However, the set of chars they deal with only partially overlap with the set of chars that need encoding as per the RFC3986 for URIs and RFC1738 for URLs discussed at
|
---|
40 | https://perishablepress.com/stop-using-unsafe-characters-in-urls/
|
---|
41 | We want to handle all the characters listed as unsafe and reserved at https://perishablepress.com/stop-using-unsafe-characters-in-urls/
|
---|
42 | so we define and use our own conceptually equivalent methods for both existing JavaScript methods:
|
---|
43 | - makeSafeURL() for Javascript's encodeURI() to make sure all unsafe characters in URLs are escaped by being URL encoded
|
---|
44 | - and makeSafeURLComponent() for JavaScript's encodeURIComponent to additionally make sure all reserved characters in a URL portion are escaped by being URL encoded too
|
---|
45 |
|
---|
46 | Function makeSafeURL() is passed a string that represents a URL and therefore only deals with characters that are unsafe in a URL and which therefore require escaping.
|
---|
47 | Function makeSafeURLComponent() deals with portions of a URL that when decoded need not represent a URL at all, for example data like inline templates passed in as a
|
---|
48 | URL query string's parameter values. As such makeSafeURLComponent() should escape both unsafe URL characters and characters that are reserved in URLs since reserved
|
---|
49 | characters in the query string part (as query param values representing data) may take on a different meaning from their reserved meaning in a URL context.
|
---|
50 | */
|
---|
51 |
|
---|
52 | /* URL encodes both
|
---|
53 | - UNSAFE characters to make URL safe, by calling makeSafeURL()
|
---|
54 | - and RESERVED characters (characters that have reserved meanings within a URL) to make URL valid, since the url component parameter could use reserved characters
|
---|
55 | in a non-URL sense. For example, the inline template (ilt) parameter value of a URL could use '=' and '&' signs where these would have XSLT rather than URL meanings.
|
---|
56 |
|
---|
57 | See end of https://www.w3schools.com/jsref/jsref_replace.asp to use a callback passing each captured element of a regex in str.replace()
|
---|
58 | */
|
---|
59 | function makeURLComponentSafe(url_part, encode_percentages) {
|
---|
60 | // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
|
---|
61 | encode_percentages = encode_percentages || 1; // this method forces the URL-encoding of any % in url_part, e.g. do this for inline-templates that haven't ever been encoded
|
---|
62 |
|
---|
63 | var url_encoded = makeURLSafe(url_part, encode_percentages);
|
---|
64 | //return url_encoded.replace(/;/g, "%3B").replace(/\//g, "%2F").replace(/\?/g, "%3F").replace(/\:/g, "%3A").replace(/\@/g, "%40").replace(/=/g, "%3D").replace(/\&/g,"%26");
|
---|
65 | url_encoded = url_encoded.replace(/[\;\/\?\:\@\=\&]/g, function(s) {
|
---|
66 | return urlEncodeChar(s);
|
---|
67 | });
|
---|
68 | return url_encoded;
|
---|
69 | }
|
---|
70 |
|
---|
71 | /*
|
---|
72 | URL encode UNSAFE characters to make URL passed in safe.
|
---|
73 | Set encode_percentages to 1 (true) if you don't want % signs encoded: you'd do so if the url is already partly URL encoded.
|
---|
74 | */
|
---|
75 | function makeURLSafe(url, encode_percentages) {
|
---|
76 | encode_percentages = encode_percentages || 0; // https://stackoverflow.com/questions/12797118/how-can-i-declare-optional-function-parameters-in-javascript
|
---|
77 |
|
---|
78 | var url_encoded = url;
|
---|
79 | if(encode_percentages) { url_encoded = url_encoded.replace(/\%/g,"%25"); } // encode % first
|
---|
80 | //url_encoded = url_encoded.replace(/ /g, "%20").replace(/\"/g,"%22").replace(/\</g,"%3C").replace(/\>/g,"%3E").replace(/\#/g,"%23").replace(/\{/g,"%7B").replace(/\}/g,"%7D");
|
---|
81 | //url_encoded = url_encoded.replace(/\|/g,"%7C").replace(/\\/g,"%5C").replace(/\^/g,"%5E").replace(/\[/g,"%5B").replace(/\]/g,"%5D").replace(/\`/g,"%60");
|
---|
82 | // Should we handle ~, but then what is its URL encoded value? Because https://meyerweb.com/eric/tools/dencoder/ URLencodes ~ to ~.
|
---|
83 | //return url_encoded;
|
---|
84 | url_encoded = url_encoded.replace(/[\ \"\<\>\#\{\}\|\\^\~\[\]\`]/g, function(s) {
|
---|
85 | return urlEncodeChar(s);
|
---|
86 | });
|
---|
87 | return url_encoded;
|
---|
88 | }
|
---|