1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
|
---|
4 | * Copyright (C) 2014 The New Zealand Digital Library Project
|
---|
5 | *
|
---|
6 | * A component of the Greenstone digital library software
|
---|
7 | * from the New Zealand Digital Library Project at the
|
---|
8 | * University of Waikato, New Zealand.
|
---|
9 | *
|
---|
10 | * This program is free software; you can redistribute it and/or modify
|
---|
11 | * it under the terms of the GNU General Public License as published by
|
---|
12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
13 | * (at your option) any later version.
|
---|
14 | *
|
---|
15 | * This program is distributed in the hope that it will be useful,
|
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | * GNU General Public License for more details.
|
---|
19 | *
|
---|
20 | * You should have received a copy of the GNU General Public License
|
---|
21 | * along with this program; if not, write to the Free Software
|
---|
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | *
|
---|
24 | *********************************************************************/
|
---|
25 | /*
|
---|
26 | * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
|
---|
27 | * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
|
---|
28 | * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
|
---|
29 | * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
|
---|
30 | *
|
---|
31 | * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
|
---|
32 | * downloaded version of the code at runtime-src/packages/security/installed/include
|
---|
33 | * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
|
---|
34 | * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
|
---|
35 | *
|
---|
36 | * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the
|
---|
37 | * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/.
|
---|
38 | * In particular the codecs and the DefaultEncoder at
|
---|
39 | * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
|
---|
40 | * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
|
---|
41 | */
|
---|
42 |
|
---|
43 | //http://www.cplusplus.com/reference/cctype/isalnum/
|
---|
44 | #include <ctype.h>
|
---|
45 | #include <stdio.h>
|
---|
46 | #include "securitytools.h"
|
---|
47 |
|
---|
48 | static bool security_on = true;
|
---|
49 |
|
---|
50 | // function prototype - unused. Using ctype::isalphanum(int) instead
|
---|
51 | bool isAlphaNumeric(const unsigned short c);
|
---|
52 |
|
---|
53 | /*
|
---|
54 | Rule 5 of the OWASP XSS cheat sheet states:
|
---|
55 | https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
|
---|
56 |
|
---|
57 | WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into
|
---|
58 | href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected
|
---|
59 | protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other
|
---|
60 | piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example:
|
---|
61 |
|
---|
62 | String userURL = request.getParameter( "userURL" )
|
---|
63 | boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false);
|
---|
64 | if (isValidURL) {
|
---|
65 | <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
|
---|
66 | }
|
---|
67 |
|
---|
68 | The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it
|
---|
69 | returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
|
---|
70 | */
|
---|
71 | bool isValidURLProtocol(const text_t& url) {
|
---|
72 | text_t::const_iterator here = url.begin();
|
---|
73 | text_t::const_iterator end = url.end();
|
---|
74 |
|
---|
75 | if(findword(here, end, "javascript:") != end) {
|
---|
76 | return true;
|
---|
77 | }
|
---|
78 | return false;
|
---|
79 | }
|
---|
80 |
|
---|
81 |
|
---|
82 | text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
|
---|
83 | text_t out;
|
---|
84 | text_t::const_iterator here = in.begin();
|
---|
85 | text_t::const_iterator end = in.end();
|
---|
86 | while (here != end) {
|
---|
87 | out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
|
---|
88 | ++here;
|
---|
89 | }
|
---|
90 | return out;
|
---|
91 | }
|
---|
92 |
|
---|
93 | text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
|
---|
94 | text_t out;
|
---|
95 | text_t::const_iterator here = in.begin();
|
---|
96 | text_t::const_iterator end = in.end();
|
---|
97 | while (here != end) {
|
---|
98 | out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
|
---|
99 | ++here;
|
---|
100 | }
|
---|
101 | return out;
|
---|
102 | }
|
---|
103 |
|
---|
104 | text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
|
---|
105 | text_t out;
|
---|
106 | text_t::const_iterator here = in.begin();
|
---|
107 | text_t::const_iterator end = in.end();
|
---|
108 | while (here != end) {
|
---|
109 | out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
|
---|
110 | ++here;
|
---|
111 | }
|
---|
112 | return out;
|
---|
113 | }
|
---|
114 |
|
---|
115 |
|
---|
116 | text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
|
---|
117 | text_t out;
|
---|
118 | text_t::const_iterator here = in.begin();
|
---|
119 | text_t::const_iterator end = in.end();
|
---|
120 | while (here != end) {
|
---|
121 | out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
|
---|
122 | ++here;
|
---|
123 | }
|
---|
124 | return out;
|
---|
125 | }
|
---|
126 |
|
---|
127 | text_t encodeForJavascript(const text_t& in, const text_t& immuneChars) {
|
---|
128 | text_t out;
|
---|
129 | text_t::const_iterator here = in.begin();
|
---|
130 | text_t::const_iterator end = in.end();
|
---|
131 | while (here != end) {
|
---|
132 | out += encodeForJavascript(immuneChars, *here); // IMMUNE_JAVASCRIPT by default
|
---|
133 | ++here;
|
---|
134 | }
|
---|
135 | return out;
|
---|
136 | }
|
---|
137 |
|
---|
138 | text_t encodeForMySQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
|
---|
139 | text_t out;
|
---|
140 | text_t::const_iterator here = in.begin();
|
---|
141 | text_t::const_iterator end = in.end();
|
---|
142 | while (here != end) {
|
---|
143 | out += encodeForMySQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
|
---|
144 | ++here;
|
---|
145 | }
|
---|
146 | return out;
|
---|
147 | }
|
---|
148 |
|
---|
149 |
|
---|
150 | /*
|
---|
151 | The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
|
---|
152 | by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
|
---|
153 | http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html
|
---|
154 |
|
---|
155 | When encoding a String, the following rules apply:
|
---|
156 |
|
---|
157 | The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
|
---|
158 | The special characters ".", "-", "*", and "_" remain the same.
|
---|
159 | The space character " " is converted into a plus sign "+".
|
---|
160 | All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used.
|
---|
161 |
|
---|
162 | */
|
---|
163 | text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
|
---|
164 |
|
---|
165 | text_t result = "";
|
---|
166 | text_t::const_iterator here = immuneChars.begin();
|
---|
167 | text_t::const_iterator end = immuneChars.end();
|
---|
168 |
|
---|
169 | // Check if the character is in the list of chars immune to encoding
|
---|
170 | if(findchar(here, end, in) != end) {
|
---|
171 | result.push_back(in);
|
---|
172 | }
|
---|
173 |
|
---|
174 | else if(isalnum((int)in)) {
|
---|
175 | result.push_back(in);
|
---|
176 | }
|
---|
177 |
|
---|
178 | // for URLs, space becomes +
|
---|
179 | else if(in == ' ' ) {
|
---|
180 | result.push_back('+');
|
---|
181 | }
|
---|
182 |
|
---|
183 | // all other chars converted to hexadecimal %XY
|
---|
184 | else {
|
---|
185 | char hex_char[4];
|
---|
186 | sprintf(hex_char,"%%%02X",in);
|
---|
187 | result = text_t(hex_char); // result += hex_char;
|
---|
188 | }
|
---|
189 |
|
---|
190 | return result;
|
---|
191 | }
|
---|
192 |
|
---|
193 | // encodes for both HTML and HTML attributes.
|
---|
194 | // The chars in the immuneChars array determines which of the two this is
|
---|
195 | // See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
|
---|
196 | text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
|
---|
197 |
|
---|
198 | text_t result = "";
|
---|
199 | text_t::const_iterator here = immuneChars.begin();
|
---|
200 | text_t::const_iterator end = immuneChars.end();
|
---|
201 |
|
---|
202 | // Check if the character is in the list of chars immune to encoding
|
---|
203 | if(findchar(here, end, in) != end) {
|
---|
204 | result.push_back(in);
|
---|
205 | }
|
---|
206 |
|
---|
207 | else if(isalnum((int)in)) {
|
---|
208 | result.push_back(in);
|
---|
209 | }
|
---|
210 |
|
---|
211 | // check for illegal characters
|
---|
212 | // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
|
---|
213 | // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
|
---|
214 | // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character
|
---|
215 |
|
---|
216 | else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
|
---|
217 | result = "&#x" + REPLACEMENT_HEX + ";";
|
---|
218 | // Let's entity encode this instead of returning it
|
---|
219 | //c = REPLACEMENT_CHAR;
|
---|
220 | }
|
---|
221 |
|
---|
222 | // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form «
|
---|
223 | else {
|
---|
224 | char hex_char[3];
|
---|
225 | sprintf(hex_char,"%02X",in);
|
---|
226 | result = "&#x" + text_t(hex_char) + ";";
|
---|
227 | }
|
---|
228 |
|
---|
229 | return result;
|
---|
230 | }
|
---|
231 |
|
---|
232 | // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
|
---|
233 | // return the hex and end in whitespace to terminate
|
---|
234 | text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
|
---|
235 |
|
---|
236 | text_t result = "";
|
---|
237 | text_t::const_iterator here = immuneChars.begin();
|
---|
238 | text_t::const_iterator end = immuneChars.end();
|
---|
239 |
|
---|
240 | // Check if the character is in the list of chars immune to encoding
|
---|
241 | if(findchar(here, end, in) != end) {
|
---|
242 | result.push_back(in);
|
---|
243 | }
|
---|
244 |
|
---|
245 | else if(isalnum((int)in)) {
|
---|
246 | result.push_back(in);
|
---|
247 | }
|
---|
248 |
|
---|
249 | // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form «
|
---|
250 | else {
|
---|
251 | char hex_char[3];
|
---|
252 | sprintf(hex_char,"%02X",in);
|
---|
253 | // return the hex and end in whitespace to terminate
|
---|
254 | result = "\\" + text_t(hex_char) + " ";
|
---|
255 | }
|
---|
256 |
|
---|
257 | return result;
|
---|
258 | }
|
---|
259 |
|
---|
260 | // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
|
---|
261 | text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in) {
|
---|
262 |
|
---|
263 | text_t result = "";
|
---|
264 | text_t::const_iterator start = immuneChars.begin();
|
---|
265 | text_t::const_iterator end = immuneChars.end();
|
---|
266 |
|
---|
267 | // Check if the character is in the list of chars immune to encoding
|
---|
268 | if(findchar(start, end, in) != end) {
|
---|
269 | result.push_back(in);
|
---|
270 | }
|
---|
271 |
|
---|
272 | else if(isalnum((int)in)) {
|
---|
273 | result.push_back(in);
|
---|
274 | }
|
---|
275 |
|
---|
276 | // Do not use these shortcuts as they can be used to break out of a context
|
---|
277 | // if ( ch == 0x00 ) return "\\0";
|
---|
278 | // if ( ch == 0x08 ) return "\\b";
|
---|
279 | // if ( ch == 0x09 ) return "\\t";
|
---|
280 | // if ( ch == 0x0a ) return "\\n";
|
---|
281 | // if ( ch == 0x0b ) return "\\v";
|
---|
282 | // if ( ch == 0x0c ) return "\\f";
|
---|
283 | // if ( ch == 0x0d ) return "\\r";
|
---|
284 | // if ( ch == 0x22 ) return "\\\"";
|
---|
285 | // if ( ch == 0x27 ) return "\\'";
|
---|
286 | // if ( ch == 0x5c ) return "\\\\";
|
---|
287 |
|
---|
288 |
|
---|
289 | // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
|
---|
290 | else {
|
---|
291 |
|
---|
292 | // encode up to 256 with \\xHH
|
---|
293 | if(in < 256) {
|
---|
294 | char hex_char[3];
|
---|
295 | sprintf(hex_char,"%02X",in);
|
---|
296 | result = "\\x" + text_t(hex_char);
|
---|
297 | }
|
---|
298 | // otherwise encode with \\uHHHH
|
---|
299 | else {
|
---|
300 | char hex_char[5];
|
---|
301 | sprintf(hex_char,"%04X",in);
|
---|
302 | result = "\\u" + text_t(hex_char);
|
---|
303 | }
|
---|
304 |
|
---|
305 | }
|
---|
306 |
|
---|
307 | return result;
|
---|
308 | }
|
---|
309 |
|
---|
310 |
|
---|
311 | /*
|
---|
312 | http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
|
---|
313 | Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
|
---|
314 | which states:
|
---|
315 | "This technique works like this. Each DBMS supports one or more character escaping schemes
|
---|
316 | specific to certain kinds of queries. If you then escape all user supplied input using the
|
---|
317 | proper escaping scheme for the database you are using, the DBMS will not confuse that input
|
---|
318 | with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."
|
---|
319 |
|
---|
320 | http://www.php.net/manual/en/mysqli.real-escape-string.php
|
---|
321 | http://www.php.net/manual/en/function.mysql-real-escape-string.php
|
---|
322 | http://www.php.net/manual/en/function.sqlite-escape-string.php
|
---|
323 | http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
|
---|
324 | http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp
|
---|
325 |
|
---|
326 | */
|
---|
327 | text_t encodeForMySQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
|
---|
328 |
|
---|
329 | text_t result = "";
|
---|
330 | text_t::const_iterator start = immuneChars.begin();
|
---|
331 | text_t::const_iterator end = immuneChars.end();
|
---|
332 |
|
---|
333 | // Check if the character is in the list of chars immune to encoding
|
---|
334 | if(findchar(start, end, in) != end) {
|
---|
335 | result.push_back(in);
|
---|
336 | }
|
---|
337 |
|
---|
338 | else if(isalnum((int)in)) {
|
---|
339 | result.push_back(in);
|
---|
340 | }
|
---|
341 |
|
---|
342 | // switch( mode ) {
|
---|
343 | // case ANSI: return encodeCharacterANSI( c );
|
---|
344 | // case STANDARD: return encodeCharacterMySQL( c );
|
---|
345 | // }
|
---|
346 |
|
---|
347 | if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL
|
---|
348 |
|
---|
349 | if ( in == 0x00 ) result = "\\0";
|
---|
350 | else if ( in == 0x08 ) result = "\\b";
|
---|
351 | else if ( in == 0x09 ) result = "\\t";
|
---|
352 | else if ( in == 0x0a ) result = "\\n";
|
---|
353 | else if ( in == 0x0d ) result = "\\r";
|
---|
354 | else if ( in == 0x1a ) result = "\\Z";
|
---|
355 | else if ( in == 0x22 ) result = "\\\"";
|
---|
356 | else if ( in == 0x25 ) result = "\\%";
|
---|
357 | else if ( in == 0x27 ) result = "\\'";
|
---|
358 | else if ( in == 0x5c ) result = "\\\\";
|
---|
359 | else if ( in == 0x5f ) result = "\\_";
|
---|
360 | else {
|
---|
361 | result = "\\";
|
---|
362 | result.push_back(in);
|
---|
363 | }
|
---|
364 |
|
---|
365 | } else { // mode is ANSI, encodeCharacterANSI:
|
---|
366 |
|
---|
367 | /* Encode for ANSI SQL.
|
---|
368 | Apostrophe is encoded
|
---|
369 | Bug ###: In ANSI Mode Strings can also be passed in using the quotation.
|
---|
370 | In ANSI_QUOTES mode a quotation is considered to be an identifier, thus
|
---|
371 | cannot be used at all in a value and will be dropped completely.
|
---|
372 | returns a string encoded to standards of MySQL running in ANSI mode
|
---|
373 | */
|
---|
374 |
|
---|
375 | if ( in == '\'' ) result = "\'\'";
|
---|
376 | else if ( in == '\"' ) result = "";
|
---|
377 | else result.push_back(in);
|
---|
378 |
|
---|
379 | }
|
---|
380 |
|
---|
381 | return result;
|
---|
382 | }
|
---|
383 |
|
---|
384 | // See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
|
---|
385 | // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
|
---|
386 | // See Integer.toHexString()
|
---|
387 | // http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
|
---|
388 | // http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
|
---|
389 | bool isAlphaNumeric(const unsigned short c) {
|
---|
390 | if(c >= 0xFF) { // >= 256 need no further checking, it is not alphanumeric
|
---|
391 | return false;
|
---|
392 | }
|
---|
393 | // alphanumeric: 0 - 9 || A - Z || a - z
|
---|
394 | if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
|
---|
395 | return true;
|
---|
396 | }
|
---|
397 | // < 255, but not alphanumeric
|
---|
398 | return false;
|
---|
399 | }
|
---|