source: main/trunk/greenstone2/runtime-src/src/recpt/securitytools.cpp@ 28888

Last change on this file since 28888 was 28888, checked in by ak19, 10 years ago

First security commit. 1. Introducing the new securitools.h and .cpp files, which port the functions necessary to implement security in Greenstone from OWASP-ESAPI for Java, since OWASP's C++ version is largely not yet implemented, even though their code compiles. The newly added runtime-src/packages/security which contains OWASP ESAPI for C++ will therefore be removed again shortly. 2. receptionist.cpp now sets various web-encoded variants for each cgiarg macro, such as HTML entity encoded, attr encoded, javascript encoded (and css encoded variants). These are now used in the macro files based on which variant is suited to the context. 3. This commit further contains the minimum changes to protect the c, d, and p cgi variables.

File size: 14.6 KB
Line 
1/**********************************************************************
2 *
3 * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
4 * Copyright (C) 2014 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25/*
26 * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
27 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
28 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
29 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
30 *
31 * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
32 * downloaded version of the code at runtime-src/packages/security/installed/include
33 * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
34 * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
35 *
36 * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the
37 * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/.
38 * In particular the codecs and the DefaultEncoder at
39 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
40 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
41*/
42
43//http://www.cplusplus.com/reference/cctype/isalnum/
44#include <ctype.h>
45#include <stdio.h>
46#include "securitytools.h"
47
48static bool security_on = true;
49
50// function prototype - unused. Using ctype::isalphanum(int) instead
51bool isAlphaNumeric(const unsigned short c);
52
53/*
54 Rule 5 of the OWASP XSS cheat sheet states:
55 https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
56
57 WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into
58 href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected
59 protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other
60 piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example:
61
62 String userURL = request.getParameter( "userURL" )
63 boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false);
64 if (isValidURL) {
65 <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
66 }
67
68 The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it
69 returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
70*/
71bool isValidURLProtocol(const text_t& url) {
72 text_t::const_iterator here = url.begin();
73 text_t::const_iterator end = url.end();
74
75 if(findword(here, end, "javascript:") != end) {
76 return true;
77 }
78 return false;
79}
80
81
82text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
83 text_t out;
84 text_t::const_iterator here = in.begin();
85 text_t::const_iterator end = in.end();
86 while (here != end) {
87 out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
88 ++here;
89 }
90 return out;
91}
92
93text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
94 text_t out;
95 text_t::const_iterator here = in.begin();
96 text_t::const_iterator end = in.end();
97 while (here != end) {
98 out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
99 ++here;
100 }
101 return out;
102}
103
104text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
105 text_t out;
106 text_t::const_iterator here = in.begin();
107 text_t::const_iterator end = in.end();
108 while (here != end) {
109 out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
110 ++here;
111 }
112 return out;
113}
114
115
116text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
117 text_t out;
118 text_t::const_iterator here = in.begin();
119 text_t::const_iterator end = in.end();
120 while (here != end) {
121 out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
122 ++here;
123 }
124 return out;
125}
126
127text_t encodeForJavascript(const text_t& in, const text_t& immuneChars) {
128 text_t out;
129 text_t::const_iterator here = in.begin();
130 text_t::const_iterator end = in.end();
131 while (here != end) {
132 out += encodeForJavascript(immuneChars, *here); // IMMUNE_JAVASCRIPT by default
133 ++here;
134 }
135 return out;
136}
137
138text_t encodeForMySQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
139 text_t out;
140 text_t::const_iterator here = in.begin();
141 text_t::const_iterator end = in.end();
142 while (here != end) {
143 out += encodeForMySQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
144 ++here;
145 }
146 return out;
147}
148
149
150/*
151The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
152by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
153http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html
154
155When encoding a String, the following rules apply:
156
157 The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
158 The special characters ".", "-", "*", and "_" remain the same.
159 The space character " " is converted into a plus sign "+".
160 All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used.
161
162*/
163text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
164
165 text_t result = "";
166 text_t::const_iterator here = immuneChars.begin();
167 text_t::const_iterator end = immuneChars.end();
168
169 // Check if the character is in the list of chars immune to encoding
170 if(findchar(here, end, in) != end) {
171 result.push_back(in);
172 }
173
174 else if(isalnum((int)in)) {
175 result.push_back(in);
176 }
177
178 // for URLs, space becomes +
179 else if(in == ' ' ) {
180 result.push_back('+');
181 }
182
183 // all other chars converted to hexadecimal %XY
184 else {
185 char hex_char[4];
186 sprintf(hex_char,"%%%02X",in);
187 result = text_t(hex_char); // result += hex_char;
188 }
189
190 return result;
191}
192
193// encodes for both HTML and HTML attributes.
194// The chars in the immuneChars array determines which of the two this is
195// See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
196text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
197
198 text_t result = "";
199 text_t::const_iterator here = immuneChars.begin();
200 text_t::const_iterator end = immuneChars.end();
201
202 // Check if the character is in the list of chars immune to encoding
203 if(findchar(here, end, in) != end) {
204 result.push_back(in);
205 }
206
207 else if(isalnum((int)in)) {
208 result.push_back(in);
209 }
210
211 // check for illegal characters
212 // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
213 // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
214 // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character
215
216 else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
217 result = "&#x" + REPLACEMENT_HEX + ";";
218 // Let's entity encode this instead of returning it
219 //c = REPLACEMENT_CHAR;
220 }
221
222 // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
223 else {
224 char hex_char[3];
225 sprintf(hex_char,"%02X",in);
226 result = "&#x" + text_t(hex_char) + ";";
227 }
228
229 return result;
230}
231
232// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
233// return the hex and end in whitespace to terminate
234text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
235
236 text_t result = "";
237 text_t::const_iterator here = immuneChars.begin();
238 text_t::const_iterator end = immuneChars.end();
239
240 // Check if the character is in the list of chars immune to encoding
241 if(findchar(here, end, in) != end) {
242 result.push_back(in);
243 }
244
245 else if(isalnum((int)in)) {
246 result.push_back(in);
247 }
248
249 // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
250 else {
251 char hex_char[3];
252 sprintf(hex_char,"%02X",in);
253 // return the hex and end in whitespace to terminate
254 result = "\\" + text_t(hex_char) + " ";
255 }
256
257 return result;
258}
259
260// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
261text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in) {
262
263 text_t result = "";
264 text_t::const_iterator start = immuneChars.begin();
265 text_t::const_iterator end = immuneChars.end();
266
267 // Check if the character is in the list of chars immune to encoding
268 if(findchar(start, end, in) != end) {
269 result.push_back(in);
270 }
271
272 else if(isalnum((int)in)) {
273 result.push_back(in);
274 }
275
276 // Do not use these shortcuts as they can be used to break out of a context
277 // if ( ch == 0x00 ) return "\\0";
278 // if ( ch == 0x08 ) return "\\b";
279 // if ( ch == 0x09 ) return "\\t";
280 // if ( ch == 0x0a ) return "\\n";
281 // if ( ch == 0x0b ) return "\\v";
282 // if ( ch == 0x0c ) return "\\f";
283 // if ( ch == 0x0d ) return "\\r";
284 // if ( ch == 0x22 ) return "\\\"";
285 // if ( ch == 0x27 ) return "\\'";
286 // if ( ch == 0x5c ) return "\\\\";
287
288
289 // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
290 else {
291
292 // encode up to 256 with \\xHH
293 if(in < 256) {
294 char hex_char[3];
295 sprintf(hex_char,"%02X",in);
296 result = "\\x" + text_t(hex_char);
297 }
298 // otherwise encode with \\uHHHH
299 else {
300 char hex_char[5];
301 sprintf(hex_char,"%04X",in);
302 result = "\\u" + text_t(hex_char);
303 }
304
305 }
306
307 return result;
308}
309
310
311/*
312http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
313 Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
314 which states:
315 "This technique works like this. Each DBMS supports one or more character escaping schemes
316 specific to certain kinds of queries. If you then escape all user supplied input using the
317 proper escaping scheme for the database you are using, the DBMS will not confuse that input
318 with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."
319
320http://www.php.net/manual/en/mysqli.real-escape-string.php
321http://www.php.net/manual/en/function.mysql-real-escape-string.php
322http://www.php.net/manual/en/function.sqlite-escape-string.php
323http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
324http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp
325
326*/
327text_t encodeForMySQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
328
329 text_t result = "";
330 text_t::const_iterator start = immuneChars.begin();
331 text_t::const_iterator end = immuneChars.end();
332
333 // Check if the character is in the list of chars immune to encoding
334 if(findchar(start, end, in) != end) {
335 result.push_back(in);
336 }
337
338 else if(isalnum((int)in)) {
339 result.push_back(in);
340 }
341
342 // switch( mode ) {
343 // case ANSI: return encodeCharacterANSI( c );
344 // case STANDARD: return encodeCharacterMySQL( c );
345 // }
346
347 if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL
348
349 if ( in == 0x00 ) result = "\\0";
350 else if ( in == 0x08 ) result = "\\b";
351 else if ( in == 0x09 ) result = "\\t";
352 else if ( in == 0x0a ) result = "\\n";
353 else if ( in == 0x0d ) result = "\\r";
354 else if ( in == 0x1a ) result = "\\Z";
355 else if ( in == 0x22 ) result = "\\\"";
356 else if ( in == 0x25 ) result = "\\%";
357 else if ( in == 0x27 ) result = "\\'";
358 else if ( in == 0x5c ) result = "\\\\";
359 else if ( in == 0x5f ) result = "\\_";
360 else {
361 result = "\\";
362 result.push_back(in);
363 }
364
365 } else { // mode is ANSI, encodeCharacterANSI:
366
367 /* Encode for ANSI SQL.
368 Apostrophe is encoded
369 Bug ###: In ANSI Mode Strings can also be passed in using the quotation.
370 In ANSI_QUOTES mode a quotation is considered to be an identifier, thus
371 cannot be used at all in a value and will be dropped completely.
372 returns a string encoded to standards of MySQL running in ANSI mode
373 */
374
375 if ( in == '\'' ) result = "\'\'";
376 else if ( in == '\"' ) result = "";
377 else result.push_back(in);
378
379 }
380
381 return result;
382}
383
384// See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
385// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
386// See Integer.toHexString()
387// http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
388// http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
389bool isAlphaNumeric(const unsigned short c) {
390 if(c >= 0xFF) { // >= 256 need no further checking, it is not alphanumeric
391 return false;
392 }
393 // alphanumeric: 0 - 9 || A - Z || a - z
394 if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
395 return true;
396 }
397 // < 255, but not alphanumeric
398 return false;
399}
Note: See TracBrowser for help on using the repository browser.