source: main/trunk/greenstone2/runtime-src/src/recpt/securitytools.cpp@ 28898

Last change on this file since 28898 was 28898, checked in by ak19, 10 years ago
  1. The cgiargq query variable is now no longer escaped in the 3 simply or large forms that use it. fqv and other js escaped fields are unchanged, since the jssafe now ensures that backslashes are escaped for macro files, so these resolve correctly in query.dm. 2. securitytools.cpp and .h updated to additionally escape back slashes for macro files when javascript escaping. This is done by default, since jssafe variants of cgiargs are all that are used, and they're used in macro files. 3. Encoded versions of decodedcompressedoptions are now used in all macro files. They're always used in attributes, so the attrsafe version which is set in receptionist.cpp is used.
File size: 14.9 KB
Line 
1/**********************************************************************
2 *
3 * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
4 * Copyright (C) 2014 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25/*
26 * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
27 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
28 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
29 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
30 *
31 * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
32 * downloaded version of the code at runtime-src/packages/security/installed/include
33 * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
34 * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
35 *
36 * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the
37 * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/.
38 * In particular the codecs and the DefaultEncoder at
39 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
40 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
41*/
42
43//http://www.cplusplus.com/reference/cctype/isalnum/
44#include <ctype.h>
45#include <stdio.h>
46#include "securitytools.h"
47
48static bool security_on = true;
49
50// function prototype - unused. Using ctype::isalphanum(int) instead
51bool isAlphaNumeric(const unsigned short c);
52
53/*
54 Rule 5 of the OWASP XSS cheat sheet states:
55 https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
56
57 WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into
58 href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected
59 protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other
60 piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example:
61
62 String userURL = request.getParameter( "userURL" )
63 boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false);
64 if (isValidURL) {
65 <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
66 }
67
68 The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it
69 returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
70*/
71bool isValidURLProtocol(const text_t& url) {
72 text_t::const_iterator here = url.begin();
73 text_t::const_iterator end = url.end();
74
75 if(findword(here, end, "javascript:") != end) {
76 return true;
77 }
78 return false;
79}
80
81
82text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
83 text_t out;
84 text_t::const_iterator here = in.begin();
85 text_t::const_iterator end = in.end();
86 while (here != end) {
87 out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
88 ++here;
89 }
90 return out;
91}
92
93text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
94 text_t out;
95 text_t::const_iterator here = in.begin();
96 text_t::const_iterator end = in.end();
97 while (here != end) {
98 out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
99 ++here;
100 }
101 return out;
102}
103
104text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
105 text_t out;
106 text_t::const_iterator here = in.begin();
107 text_t::const_iterator end = in.end();
108 while (here != end) {
109 out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
110 ++here;
111 }
112 return out;
113}
114
115
116text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
117 text_t out;
118 text_t::const_iterator here = in.begin();
119 text_t::const_iterator end = in.end();
120 while (here != end) {
121 out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
122 ++here;
123 }
124 return out;
125}
126
127text_t encodeForJavascript(const text_t& in, const text_t& immuneChars, bool dmsafe) {
128 text_t out;
129 text_t::const_iterator here = in.begin();
130 text_t::const_iterator end = in.end();
131 while (here != end) {
132 out += encodeForJavascript(immuneChars, *here, dmsafe); // IMMUNE_JAVASCRIPT by default
133 ++here;
134 }
135 return out;
136}
137
138
139text_t encodeForMySQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
140 text_t out;
141 text_t::const_iterator here = in.begin();
142 text_t::const_iterator end = in.end();
143 while (here != end) {
144 out += encodeForMySQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
145 ++here;
146 }
147 return out;
148}
149
150
151/*
152The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
153by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
154http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html
155
156When encoding a String, the following rules apply:
157
158 The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
159 The special characters ".", "-", "*", and "_" remain the same.
160 The space character " " is converted into a plus sign "+".
161 All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used.
162
163*/
164text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
165
166 text_t result = "";
167 text_t::const_iterator here = immuneChars.begin();
168 text_t::const_iterator end = immuneChars.end();
169
170 // Check if the character is in the list of chars immune to encoding
171 if(findchar(here, end, in) != end) {
172 result.push_back(in);
173 }
174
175 else if(isalnum((int)in)) {
176 result.push_back(in);
177 }
178
179 // for URLs, space becomes +
180 else if(in == ' ' ) {
181 result.push_back('+');
182 }
183
184 // all other chars converted to hexadecimal %XY
185 else {
186 char hex_char[4];
187 sprintf(hex_char,"%%%02X",in);
188 result = text_t(hex_char); // result += hex_char;
189 }
190
191 return result;
192}
193
194// encodes for both HTML and HTML attributes.
195// The chars in the immuneChars array determines which of the two this is
196// See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
197text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
198
199 text_t result = "";
200 text_t::const_iterator here = immuneChars.begin();
201 text_t::const_iterator end = immuneChars.end();
202
203 // Check if the character is in the list of chars immune to encoding
204 if(findchar(here, end, in) != end) {
205 result.push_back(in);
206 }
207
208 else if(isalnum((int)in)) {
209 result.push_back(in);
210 }
211
212 // check for illegal characters
213 // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
214 // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
215 // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character
216
217 else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
218 result = "&#x" + REPLACEMENT_HEX + ";";
219 // Let's entity encode this instead of returning it
220 //c = REPLACEMENT_CHAR;
221 }
222
223 // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
224 else {
225 char hex_char[3];
226 sprintf(hex_char,"%02X",in);
227 result = "&#x" + text_t(hex_char) + ";";
228 }
229
230 return result;
231}
232
233// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
234// return the hex and end in whitespace to terminate
235text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
236
237 text_t result = "";
238 text_t::const_iterator here = immuneChars.begin();
239 text_t::const_iterator end = immuneChars.end();
240
241 // Check if the character is in the list of chars immune to encoding
242 if(findchar(here, end, in) != end) {
243 result.push_back(in);
244 }
245
246 else if(isalnum((int)in)) {
247 result.push_back(in);
248 }
249
250 // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
251 else {
252 char hex_char[3];
253 sprintf(hex_char,"%02X",in);
254 // return the hex and end in whitespace to terminate
255 result = "\\" + text_t(hex_char) + " ";
256 }
257
258 return result;
259}
260
261// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
262text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in, bool dmsafe) {
263
264 text_t result = "";
265 text_t::const_iterator start = immuneChars.begin();
266 text_t::const_iterator end = immuneChars.end();
267
268 // Check if the character is in the list of chars immune to encoding
269 if(findchar(start, end, in) != end) {
270 result.push_back(in);
271 }
272
273 else if(isalnum((int)in)) {
274 result.push_back(in);
275 }
276
277 // Do not use these shortcuts as they can be used to break out of a context
278 // if ( ch == 0x00 ) return "\\0";
279 // if ( ch == 0x08 ) return "\\b";
280 // if ( ch == 0x09 ) return "\\t";
281 // if ( ch == 0x0a ) return "\\n";
282 // if ( ch == 0x0b ) return "\\v";
283 // if ( ch == 0x0c ) return "\\f";
284 // if ( ch == 0x0d ) return "\\r";
285 // if ( ch == 0x22 ) return "\\\"";
286 // if ( ch == 0x27 ) return "\\'";
287 // if ( ch == 0x5c ) return "\\\\";
288
289
290 // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
291 else {
292
293 // encode up to 256 with \\xHH
294 if(in < 256) {
295 char hex_char[3];
296 sprintf(hex_char,"%02X",in);
297
298 if(dmsafe) { // double escape backslashes for macro files
299 result = "\\\\x" + text_t(hex_char);
300 } else {
301 result = "\\x" + text_t(hex_char);
302 }
303 }
304 // otherwise encode with \\uHHHH
305 else {
306 char hex_char[5];
307 sprintf(hex_char,"%04X",in);
308 if(dmsafe) { // double escape backslashes for macro files
309 result = "\\\\u" + text_t(hex_char);
310 } else {
311 result = "\\u" + text_t(hex_char);
312 }
313 }
314
315 }
316
317 return result;
318}
319
320
321/*
322http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
323 Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
324 which states:
325 "This technique works like this. Each DBMS supports one or more character escaping schemes
326 specific to certain kinds of queries. If you then escape all user supplied input using the
327 proper escaping scheme for the database you are using, the DBMS will not confuse that input
328 with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."
329
330http://www.php.net/manual/en/mysqli.real-escape-string.php
331http://www.php.net/manual/en/function.mysql-real-escape-string.php
332http://www.php.net/manual/en/function.sqlite-escape-string.php
333http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
334http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp
335
336*/
337text_t encodeForMySQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
338
339 text_t result = "";
340 text_t::const_iterator start = immuneChars.begin();
341 text_t::const_iterator end = immuneChars.end();
342
343 // Check if the character is in the list of chars immune to encoding
344 if(findchar(start, end, in) != end) {
345 result.push_back(in);
346 }
347
348 else if(isalnum((int)in)) {
349 result.push_back(in);
350 }
351
352 // switch( mode ) {
353 // case ANSI: return encodeCharacterANSI( c );
354 // case STANDARD: return encodeCharacterMySQL( c );
355 // }
356
357 if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL
358
359 if ( in == 0x00 ) result = "\\0";
360 else if ( in == 0x08 ) result = "\\b";
361 else if ( in == 0x09 ) result = "\\t";
362 else if ( in == 0x0a ) result = "\\n";
363 else if ( in == 0x0d ) result = "\\r";
364 else if ( in == 0x1a ) result = "\\Z";
365 else if ( in == 0x22 ) result = "\\\"";
366 else if ( in == 0x25 ) result = "\\%";
367 else if ( in == 0x27 ) result = "\\'";
368 else if ( in == 0x5c ) result = "\\\\";
369 else if ( in == 0x5f ) result = "\\_";
370 else {
371 result = "\\";
372 result.push_back(in);
373 }
374
375 } else { // mode is ANSI, encodeCharacterANSI:
376
377 /* Encode for ANSI SQL.
378 Apostrophe is encoded
379 Bug ###: In ANSI Mode Strings can also be passed in using the quotation.
380 In ANSI_QUOTES mode a quotation is considered to be an identifier, thus
381 cannot be used at all in a value and will be dropped completely.
382 returns a string encoded to standards of MySQL running in ANSI mode
383 */
384
385 if ( in == '\'' ) result = "\'\'";
386 else if ( in == '\"' ) result = "";
387 else result.push_back(in);
388
389 }
390
391 return result;
392}
393
394// See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
395// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
396// See Integer.toHexString()
397// http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
398// http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
399bool isAlphaNumeric(const unsigned short c) {
400 if(c >= 0xFF) { // >= 256 need no further checking, it is not alphanumeric
401 return false;
402 }
403 // alphanumeric: 0 - 9 || A - Z || a - z
404 if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
405 return true;
406 }
407 // < 255, but not alphanumeric
408 return false;
409}
Note: See TracBrowser for help on using the repository browser.