source: main/trunk/greenstone2/runtime-src/src/recpt/securitytools.cpp@ 28899

Last change on this file since 28899 was 28899, checked in by ak19, 7 years ago

Third commit for security, for ensuring cgiargs macros are websafe. This time all the changes to the runtime action classes.

File size: 15.0 KB
Line 
1/**********************************************************************
2 *
3 * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
4 * Copyright (C) 2014 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25/*
26 * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
27 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
28 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
29 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
30 *
31 * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
32 * downloaded version of the code at runtime-src/packages/security/installed/include
33 * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
34 * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
35 *
36 * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the
37 * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/.
38 * In particular the codecs and the DefaultEncoder at
39 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
40 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
41*/
42
43//http://www.cplusplus.com/reference/cctype/isalnum/
44#include <ctype.h>
45#include <stdio.h>
46#include "securitytools.h"
47
48static bool security_on = true;
49
50// function prototype - unused. Using ctype::isalphanum(int) instead
51bool isAlphaNumeric(const unsigned short c);
52
53/*
54 Rule 5 of the OWASP XSS cheat sheet states:
55 https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
56
57 WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into
58 href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected
59 protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other
60 piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example:
61
62 String userURL = request.getParameter( "userURL" )
63 boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false);
64 if (isValidURL) {
65 <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
66 }
67
68 The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it
69 returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
70*/
71bool isValidURLProtocol(const text_t& url) {
72 text_t::const_iterator here = url.begin();
73 text_t::const_iterator end = url.end();
74
75 if(findword(here, end, "javascript:") != end) {
76 return true;
77 }
78 return false;
79}
80
81
82text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
83 text_t out;
84 text_t::const_iterator here = in.begin();
85 text_t::const_iterator end = in.end();
86 while (here != end) {
87 out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
88 ++here;
89 }
90 return out;
91}
92
93text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
94 text_t out;
95 text_t::const_iterator here = in.begin();
96 text_t::const_iterator end = in.end();
97 while (here != end) {
98 out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
99 ++here;
100 }
101 return out;
102}
103
104text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
105 text_t out;
106 text_t::const_iterator here = in.begin();
107 text_t::const_iterator end = in.end();
108 while (here != end) {
109 out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
110 ++here;
111 }
112 return out;
113}
114
115
116text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
117 text_t out;
118 text_t::const_iterator here = in.begin();
119 text_t::const_iterator end = in.end();
120 while (here != end) {
121 out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
122 ++here;
123 }
124 return out;
125}
126
127text_t encodeForJavascript(const text_t& in, const text_t& immuneChars, bool dmsafe) {
128 text_t out;
129 text_t::const_iterator here = in.begin();
130 text_t::const_iterator end = in.end();
131 while (here != end) {
132 out += encodeForJavascript(immuneChars, *here, dmsafe); // IMMUNE_JAVASCRIPT by default
133 ++here;
134 }
135 return out;
136}
137
138text_t encodeForSQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
139 text_t out;
140 text_t::const_iterator here = in.begin();
141 text_t::const_iterator end = in.end();
142 while (here != end) {
143 out += encodeForSQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
144 ++here;
145 }
146 return out;
147}
148
149
150/*
151The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
152by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
153http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html
154
155When encoding a String, the following rules apply:
156
157 The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
158 The special characters ".", "-", "*", and "_" remain the same.
159 The space character " " is converted into a plus sign "+".
160 All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used.
161
162*/
163text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
164
165 text_t result = "";
166 text_t::const_iterator here = immuneChars.begin();
167 text_t::const_iterator end = immuneChars.end();
168
169 // Check if the character is in the list of chars immune to encoding
170 if(findchar(here, end, in) != end) {
171 result.push_back(in);
172 }
173
174 else if(isalnum((int)in)) {
175 result.push_back(in);
176 }
177
178 // for URLs, space becomes +
179 else if(in == ' ' ) {
180 result.push_back('+');
181 }
182
183 // all other chars converted to hexadecimal %XY
184 else {
185 char hex_char[4];
186 sprintf(hex_char,"%%%02X",in);
187 result = text_t(hex_char); // result += hex_char;
188 }
189
190 return result;
191}
192
193// encodes for both HTML and HTML attributes.
194// The chars in the immuneChars array determines which of the two this is
195// See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
196text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
197
198 text_t result = "";
199 text_t::const_iterator here = immuneChars.begin();
200 text_t::const_iterator end = immuneChars.end();
201
202 // Check if the character is in the list of chars immune to encoding
203 if(findchar(here, end, in) != end) {
204 result.push_back(in);
205 }
206
207 else if(isalnum((int)in)) {
208 result.push_back(in);
209 }
210
211 // check for illegal characters
212 // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
213 // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
214 // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character
215
216 else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
217 result = "&#x" + REPLACEMENT_HEX + ";";
218 // Let's entity encode this instead of returning it
219 //c = REPLACEMENT_CHAR;
220 }
221
222 // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
223 else {
224 char hex_char[3];
225 sprintf(hex_char,"%02X",in);
226 result = "&#x" + text_t(hex_char) + ";";
227 }
228
229 return result;
230}
231
232// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
233// return the hex and end in whitespace to terminate
234text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
235
236 text_t result = "";
237 text_t::const_iterator here = immuneChars.begin();
238 text_t::const_iterator end = immuneChars.end();
239
240 // Check if the character is in the list of chars immune to encoding
241 if(findchar(here, end, in) != end) {
242 result.push_back(in);
243 }
244
245 else if(isalnum((int)in)) {
246 result.push_back(in);
247 }
248
249 // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
250 else {
251 char hex_char[3];
252 sprintf(hex_char,"%02X",in);
253 // return the hex and end in whitespace to terminate
254 result = "\\" + text_t(hex_char) + " ";
255 }
256
257 return result;
258}
259
260// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
261text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in, bool dmsafe) {
262
263 text_t result = "";
264 text_t::const_iterator start = immuneChars.begin();
265 text_t::const_iterator end = immuneChars.end();
266
267 // Check if the character is in the list of chars immune to encoding
268 if(findchar(start, end, in) != end) {
269 result.push_back(in);
270 }
271
272 else if(isalnum((int)in)) {
273 result.push_back(in);
274 }
275
276 // Do not use these shortcuts as they can be used to break out of a context
277 // if ( ch == 0x00 ) return "\\0";
278 // if ( ch == 0x08 ) return "\\b";
279 // if ( ch == 0x09 ) return "\\t";
280 // if ( ch == 0x0a ) return "\\n";
281 // if ( ch == 0x0b ) return "\\v";
282 // if ( ch == 0x0c ) return "\\f";
283 // if ( ch == 0x0d ) return "\\r";
284 // if ( ch == 0x22 ) return "\\\"";
285 // if ( ch == 0x27 ) return "\\'";
286 // if ( ch == 0x5c ) return "\\\\";
287
288
289 // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
290 else {
291
292 // encode up to 256 with \\xHH
293 if(in < 256) {
294 char hex_char[3];
295 sprintf(hex_char,"%02X",in);
296
297 if(dmsafe) { // double escape backslashes for macro files
298 result = "\\\\x" + text_t(hex_char);
299 } else {
300 result = "\\x" + text_t(hex_char);
301 }
302 }
303 // otherwise encode with \\uHHHH
304 else {
305 char hex_char[5];
306 sprintf(hex_char,"%04X",in);
307 if(dmsafe) { // double escape backslashes for macro files
308 result = "\\\\u" + text_t(hex_char);
309 } else {
310 result = "\\u" + text_t(hex_char);
311 }
312 }
313
314 }
315
316 return result;
317}
318
319
320/*
321
322 C++ port of OWASP-ESAPI for MySQL. Not sure if this is is the same for SQLite
323
324http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
325 Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
326 which states:
327 "This technique works like this. Each DBMS supports one or more character escaping schemes
328 specific to certain kinds of queries. If you then escape all user supplied input using the
329 proper escaping scheme for the database you are using, the DBMS will not confuse that input
330 with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."
331
332http://www.php.net/manual/en/mysqli.real-escape-string.php
333http://www.php.net/manual/en/function.mysql-real-escape-string.php
334http://www.php.net/manual/en/function.sqlite-escape-string.php
335http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
336http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp
337
338*/
339text_t encodeForSQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
340
341 text_t result = "";
342 text_t::const_iterator start = immuneChars.begin();
343 text_t::const_iterator end = immuneChars.end();
344
345 // Check if the character is in the list of chars immune to encoding
346 if(findchar(start, end, in) != end) {
347 result.push_back(in);
348 }
349
350 else if(isalnum((int)in)) {
351 result.push_back(in);
352 }
353
354 // switch( mode ) {
355 // case ANSI: return encodeCharacterANSI( c );
356 // case STANDARD: return encodeCharacterMySQL( c );
357 // }
358
359 if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL
360
361 if ( in == 0x00 ) result = "\\0";
362 else if ( in == 0x08 ) result = "\\b";
363 else if ( in == 0x09 ) result = "\\t";
364 else if ( in == 0x0a ) result = "\\n";
365 else if ( in == 0x0d ) result = "\\r";
366 else if ( in == 0x1a ) result = "\\Z";
367 else if ( in == 0x22 ) result = "\\\"";
368 else if ( in == 0x25 ) result = "\\%";
369 else if ( in == 0x27 ) result = "\\'";
370 else if ( in == 0x5c ) result = "\\\\";
371 else if ( in == 0x5f ) result = "\\_";
372 else {
373 result = "\\";
374 result.push_back(in);
375 }
376
377 } else { // mode is ANSI, encodeCharacterANSI:
378
379 /* Encode for ANSI SQL.
380 Apostrophe is encoded
381 Bug ###: In ANSI Mode Strings can also be passed in using the quotation.
382 In ANSI_QUOTES mode a quotation is considered to be an identifier, thus
383 cannot be used at all in a value and will be dropped completely.
384 returns a string encoded to standards of MySQL running in ANSI mode
385 */
386
387 if ( in == '\'' ) result = "\'\'";
388 else if ( in == '\"' ) result = "";
389 else result.push_back(in);
390
391 }
392
393 return result;
394}
395
396// Unused at present.
397// See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
398// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
399// See Integer.toHexString()
400// http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
401// http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
402bool isAlphaNumeric(const unsigned short c) {
403 if(c >= 0xFF) { // >= 256 need no further checking, it is not alphanumeric
404 return false;
405 }
406 // alphanumeric: 0 - 9 || A - Z || a - z
407 if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
408 return true;
409 }
410 // < 255, but not alphanumeric
411 return false;
412}
Note: See TracBrowser for help on using the repository browser.