root/main/trunk/greenstone2/runtime-src/src/recpt/securitytools.cpp @ 28899

Revision 28899, 15.0 KB (checked in by ak19, 4 years ago)

Third commit for security, for ensuring cgiargs macros are websafe. This time all the changes to the runtime action classes.

Line 
1/**********************************************************************
2 *
3 * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
4 * Copyright (C) 2014  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25/*
26 * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
27 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
28 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
29 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
30 *
31 * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
32 * downloaded version of the code at runtime-src/packages/security/installed/include
33 * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
34 * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
35 *
36 * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the
37 * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/.
38 * In particular the codecs and the DefaultEncoder at
39 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
40 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
41*/
42
43//http://www.cplusplus.com/reference/cctype/isalnum/
44#include <ctype.h>
45#include <stdio.h>
46#include "securitytools.h"
47
48static bool security_on = true;
49
50// function prototype - unused. Using ctype::isalphanum(int) instead
51bool isAlphaNumeric(const unsigned short c);
52
53/* 
54    Rule 5 of the OWASP XSS cheat sheet states:
55    https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
56   
57    WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into
58    href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected
59    protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other
60    piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example:
61
62    String userURL = request.getParameter( "userURL" )
63    boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false);
64    if (isValidURL) { 
65    <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
66    }
67
68    The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it
69    returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
70*/
71bool isValidURLProtocol(const text_t& url) {
72  text_t::const_iterator here = url.begin();
73  text_t::const_iterator end = url.end();
74
75  if(findword(here, end, "javascript:") != end) {
76    return true;
77  }
78  return false;
79}
80
81
82text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
83  text_t out;
84  text_t::const_iterator here = in.begin();
85  text_t::const_iterator end = in.end();
86  while (here != end) {
87    out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
88    ++here;
89  }
90  return out;
91}
92 
93text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
94  text_t out;
95  text_t::const_iterator here = in.begin();
96  text_t::const_iterator end = in.end();
97  while (here != end) {
98    out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
99    ++here;
100  }
101  return out;
102}
103
104text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
105  text_t out;
106  text_t::const_iterator here = in.begin();
107  text_t::const_iterator end = in.end();
108  while (here != end) {
109    out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
110    ++here;
111  }
112  return out;
113}
114
115
116text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
117  text_t out;
118  text_t::const_iterator here = in.begin();
119  text_t::const_iterator end = in.end();
120  while (here != end) {
121    out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
122    ++here;
123  }
124  return out;
125}
126
127text_t encodeForJavascript(const text_t& in, const text_t& immuneChars, bool dmsafe) {
128  text_t out;
129  text_t::const_iterator here = in.begin();
130  text_t::const_iterator end = in.end();
131  while (here != end) {
132    out += encodeForJavascript(immuneChars, *here, dmsafe); // IMMUNE_JAVASCRIPT by default
133    ++here;
134  }
135  return out;
136}
137
138text_t encodeForSQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
139  text_t out;
140  text_t::const_iterator here = in.begin();
141  text_t::const_iterator end = in.end();
142  while (here != end) {
143    out += encodeForSQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
144    ++here;
145  }
146  return out;
147}
148
149
150/*
151The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
152by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
153http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html
154
155When encoding a String, the following rules apply:
156
157    The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
158    The special characters ".", "-", "*", and "_" remain the same.
159    The space character " " is converted into a plus sign "+".
160    All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used.
161
162*/
163text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
164 
165  text_t result = "";
166  text_t::const_iterator here = immuneChars.begin();
167  text_t::const_iterator end = immuneChars.end();
168
169  // Check if the character is in the list of chars immune to encoding
170  if(findchar(here, end, in) != end) {
171    result.push_back(in);
172  }
173
174  else if(isalnum((int)in)) {
175    result.push_back(in);
176  }
177
178  // for URLs, space becomes +
179  else if(in == ' ' ) {
180    result.push_back('+');
181  } 
182
183  // all other chars converted to hexadecimal %XY
184  else {
185    char hex_char[4];
186    sprintf(hex_char,"%%%02X",in);
187    result = text_t(hex_char); // result += hex_char;
188  }
189
190  return result;
191}
192
193// encodes for both HTML and HTML attributes.
194// The chars in the immuneChars array determines which of the two this is
195// See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
196text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
197 
198  text_t result = "";
199  text_t::const_iterator here = immuneChars.begin();
200  text_t::const_iterator end = immuneChars.end();
201
202  // Check if the character is in the list of chars immune to encoding
203  if(findchar(here, end, in) != end) {
204    result.push_back(in);
205  }
206
207  else if(isalnum((int)in)) {
208    result.push_back(in);
209  }
210
211  // check for illegal characters
212  // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
213  // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
214  // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character
215
216  else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
217    result = "&#x" + REPLACEMENT_HEX + ";";
218    // Let's entity encode this instead of returning it
219    //c = REPLACEMENT_CHAR;
220  } 
221
222  // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
223  else {
224    char hex_char[3];
225    sprintf(hex_char,"%02X",in);
226    result = "&#x" + text_t(hex_char) + ";";
227  }
228
229  return result;
230}
231
232// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
233// return the hex and end in whitespace to terminate
234text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
235 
236  text_t result = "";
237  text_t::const_iterator here = immuneChars.begin();
238  text_t::const_iterator end = immuneChars.end();
239
240  // Check if the character is in the list of chars immune to encoding
241  if(findchar(here, end, in) != end) {
242    result.push_back(in);
243  }
244
245  else if(isalnum((int)in)) {
246    result.push_back(in);
247  }
248
249  // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
250  else {
251    char hex_char[3];
252    sprintf(hex_char,"%02X",in);   
253    // return the hex and end in whitespace to terminate
254    result = "\\" + text_t(hex_char) + " ";
255  }
256
257  return result;
258}
259
260// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
261text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in, bool dmsafe) {
262 
263  text_t result = "";
264  text_t::const_iterator start = immuneChars.begin();
265  text_t::const_iterator end = immuneChars.end();
266
267  // Check if the character is in the list of chars immune to encoding
268  if(findchar(start, end, in) != end) {
269    result.push_back(in);
270  }
271
272  else if(isalnum((int)in)) {
273    result.push_back(in);
274  }
275
276  // Do not use these shortcuts as they can be used to break out of a context
277  // if ( ch == 0x00 ) return "\\0";
278  // if ( ch == 0x08 ) return "\\b";
279  // if ( ch == 0x09 ) return "\\t";
280  // if ( ch == 0x0a ) return "\\n";
281  // if ( ch == 0x0b ) return "\\v";
282  // if ( ch == 0x0c ) return "\\f";
283  // if ( ch == 0x0d ) return "\\r";
284  // if ( ch == 0x22 ) return "\\\"";
285  // if ( ch == 0x27 ) return "\\'";
286  // if ( ch == 0x5c ) return "\\\\";
287
288
289  // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
290  else {
291
292    // encode up to 256 with \\xHH
293    if(in < 256) {
294      char hex_char[3];
295      sprintf(hex_char,"%02X",in);
296
297      if(dmsafe) { // double escape backslashes for macro files
298    result = "\\\\x" + text_t(hex_char);
299      } else {
300    result = "\\x" + text_t(hex_char);
301      }
302    }
303    // otherwise encode with \\uHHHH
304    else {
305      char hex_char[5];
306      sprintf(hex_char,"%04X",in);
307      if(dmsafe) { // double escape backslashes for macro files
308    result = "\\\\u" + text_t(hex_char);
309      } else {
310    result = "\\u" + text_t(hex_char);     
311      }
312    }
313   
314  }
315
316  return result;
317}
318
319
320/*
321
322 C++ port of OWASP-ESAPI for MySQL. Not sure if this is is the same for SQLite
323
324http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
325 Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
326 which states:
327 "This technique works like this. Each DBMS supports one or more character escaping schemes
328 specific to certain kinds of queries. If you then escape all user supplied input using the
329 proper escaping scheme for the database you are using, the DBMS will not confuse that input
330 with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."
331
332http://www.php.net/manual/en/mysqli.real-escape-string.php
333http://www.php.net/manual/en/function.mysql-real-escape-string.php
334http://www.php.net/manual/en/function.sqlite-escape-string.php
335http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
336http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp
337
338*/
339text_t encodeForSQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
340 
341  text_t result = "";
342  text_t::const_iterator start = immuneChars.begin();
343  text_t::const_iterator end = immuneChars.end();
344
345  // Check if the character is in the list of chars immune to encoding
346  if(findchar(start, end, in) != end) {
347    result.push_back(in);
348  }
349
350  else if(isalnum((int)in)) {
351    result.push_back(in);
352  }
353
354  // switch( mode ) {
355  // case ANSI: return encodeCharacterANSI( c );
356  // case STANDARD: return encodeCharacterMySQL( c );
357  // }
358
359  if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL
360
361    if ( in == 0x00 ) result = "\\0";
362    else if ( in == 0x08 ) result = "\\b";
363    else if ( in == 0x09 ) result = "\\t";
364    else if ( in == 0x0a ) result = "\\n";
365    else if ( in == 0x0d ) result = "\\r";
366    else if ( in == 0x1a ) result = "\\Z";
367    else if ( in == 0x22 ) result = "\\\"";
368    else if ( in == 0x25 ) result = "\\%";
369    else if ( in == 0x27 ) result = "\\'";
370    else if ( in == 0x5c ) result = "\\\\";
371    else if ( in == 0x5f ) result = "\\_";
372    else {
373      result = "\\";
374      result.push_back(in);
375    }
376
377  } else { // mode is ANSI, encodeCharacterANSI:
378
379    /* Encode for ANSI SQL.
380     Apostrophe is encoded
381     Bug ###: In ANSI Mode Strings can also be passed in using the quotation.
382     In ANSI_QUOTES mode a quotation is considered to be an identifier, thus
383     cannot be used at all in a value and will be dropped completely.
384     returns a string encoded to standards of MySQL running in ANSI mode
385    */
386
387    if ( in == '\'' ) result = "\'\'";
388    else if ( in == '\"' ) result = "";
389    else result.push_back(in);
390   
391  }
392
393  return result;
394}
395
396// Unused at present.
397// See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
398// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
399// See Integer.toHexString()
400// http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
401// http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
402bool isAlphaNumeric(const unsigned short c) {
403  if(c >= 0xFF) {   // >= 256 need no further checking, it is not alphanumeric
404    return false;
405  }
406  // alphanumeric: 0 - 9 || A - Z || a - z
407  if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
408    return true;
409  }
410  // < 255, but not alphanumeric
411  return false;
412}
Note: See TracBrowser for help on using the browser.