root/main/trunk/greenstone2/runtime-src/src/recpt/securitytools.cpp @ 28898

Revision 28898, 14.9 KB (checked in by ak19, 5 years ago)

1. The cgiargq query variable is now no longer escaped in the 3 simply or large forms that use it. fqv and other js escaped fields are unchanged, since the jssafe now ensures that backslashes are escaped for macro files, so these resolve correctly in query.dm. 2. securitytools.cpp and .h updated to additionally escape back slashes for macro files when javascript escaping. This is done by default, since jssafe variants of cgiargs are all that are used, and they're used in macro files. 3. Encoded versions of decodedcompressedoptions are now used in all macro files. They're always used in attributes, so the attrsafe version which is set in receptionist.cpp is used.

Line 
1/**********************************************************************
2 *
3 * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java
4 * Copyright (C) 2014  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25/*
26 * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
27 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
28 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
29 * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
30 *
31 * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the
32 * downloaded version of the code at runtime-src/packages/security/installed/include
33 * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet.
34 * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version.
35 *
36 * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the
37 * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/.
38 * In particular the codecs and the DefaultEncoder at
39 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs
40 * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed
41*/
42
43//http://www.cplusplus.com/reference/cctype/isalnum/
44#include <ctype.h>
45#include <stdio.h>
46#include "securitytools.h"
47
48static bool security_on = true;
49
50// function prototype - unused. Using ctype::isalphanum(int) instead
51bool isAlphaNumeric(const unsigned short c);
52
53/* 
54    Rule 5 of the OWASP XSS cheat sheet states:
55    https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values
56   
57    WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into
58    href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected
59    protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other
60    piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example:
61
62    String userURL = request.getParameter( "userURL" )
63    boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false);
64    if (isValidURL) { 
65    <a href="<%=encoder.encodeForHTMLAttribute(userURL)%>">link</a>
66    }
67
68    The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it
69    returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true.
70*/
71bool isValidURLProtocol(const text_t& url) {
72  text_t::const_iterator here = url.begin();
73  text_t::const_iterator end = url.end();
74
75  if(findword(here, end, "javascript:") != end) {
76    return true;
77  }
78  return false;
79}
80
81
82text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) {
83  text_t out;
84  text_t::const_iterator here = in.begin();
85  text_t::const_iterator end = in.end();
86  while (here != end) {
87    out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default
88    ++here;
89  }
90  return out;
91}
92 
93text_t encodeForHTML(const text_t& in, const text_t& immuneChars) {
94  text_t out;
95  text_t::const_iterator here = in.begin();
96  text_t::const_iterator end = in.end();
97  while (here != end) {
98    out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default
99    ++here;
100  }
101  return out;
102}
103
104text_t encodeForCSS(const text_t& in, const text_t& immuneChars) {
105  text_t out;
106  text_t::const_iterator here = in.begin();
107  text_t::const_iterator end = in.end();
108  while (here != end) {
109    out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default
110    ++here;
111  }
112  return out;
113}
114
115
116text_t encodeForURL(const text_t& in, const text_t& immuneChars) {
117  text_t out;
118  text_t::const_iterator here = in.begin();
119  text_t::const_iterator end = in.end();
120  while (here != end) {
121    out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default
122    ++here;
123  }
124  return out;
125}
126
127text_t encodeForJavascript(const text_t& in, const text_t& immuneChars, bool dmsafe) {
128  text_t out;
129  text_t::const_iterator here = in.begin();
130  text_t::const_iterator end = in.end();
131  while (here != end) {
132    out += encodeForJavascript(immuneChars, *here, dmsafe); // IMMUNE_JAVASCRIPT by default
133    ++here;
134  }
135  return out;
136}
137
138
139text_t encodeForMySQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) {
140  text_t out;
141  text_t::const_iterator here = in.begin();
142  text_t::const_iterator end = in.end();
143  while (here != end) {
144    out += encodeForMySQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default
145    ++here;
146  }
147  return out;
148}
149
150
151/*
152The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called
153by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts:
154http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html
155
156When encoding a String, the following rules apply:
157
158    The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
159    The special characters ".", "-", "*", and "_" remain the same.
160    The space character " " is converted into a plus sign "+".
161    All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used.
162
163*/
164text_t encodeForURL(const text_t& immuneChars, const unsigned short in) {
165 
166  text_t result = "";
167  text_t::const_iterator here = immuneChars.begin();
168  text_t::const_iterator end = immuneChars.end();
169
170  // Check if the character is in the list of chars immune to encoding
171  if(findchar(here, end, in) != end) {
172    result.push_back(in);
173  }
174
175  else if(isalnum((int)in)) {
176    result.push_back(in);
177  }
178
179  // for URLs, space becomes +
180  else if(in == ' ' ) {
181    result.push_back('+');
182  } 
183
184  // all other chars converted to hexadecimal %XY
185  else {
186    char hex_char[4];
187    sprintf(hex_char,"%%%02X",in);
188    result = text_t(hex_char); // result += hex_char;
189  }
190
191  return result;
192}
193
194// encodes for both HTML and HTML attributes.
195// The chars in the immuneChars array determines which of the two this is
196// See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
197text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) {
198 
199  text_t result = "";
200  text_t::const_iterator here = immuneChars.begin();
201  text_t::const_iterator end = immuneChars.end();
202
203  // Check if the character is in the list of chars immune to encoding
204  if(findchar(here, end, in) != end) {
205    result.push_back(in);
206  }
207
208  else if(isalnum((int)in)) {
209    result.push_back(in);
210  }
211
212  // check for illegal characters
213  // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java
214  // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f
215  // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character
216
217  else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) {
218    result = "&#x" + REPLACEMENT_HEX + ";";
219    // Let's entity encode this instead of returning it
220    //c = REPLACEMENT_CHAR;
221  } 
222
223  // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
224  else {
225    char hex_char[3];
226    sprintf(hex_char,"%02X",in);
227    result = "&#x" + text_t(hex_char) + ";";
228  }
229
230  return result;
231}
232
233// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java
234// return the hex and end in whitespace to terminate
235text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) {
236 
237  text_t result = "";
238  text_t::const_iterator here = immuneChars.begin();
239  text_t::const_iterator end = immuneChars.end();
240
241  // Check if the character is in the list of chars immune to encoding
242  if(findchar(here, end, in) != end) {
243    result.push_back(in);
244  }
245
246  else if(isalnum((int)in)) {
247    result.push_back(in);
248  }
249
250  // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form &#xAB;
251  else {
252    char hex_char[3];
253    sprintf(hex_char,"%02X",in);   
254    // return the hex and end in whitespace to terminate
255    result = "\\" + text_t(hex_char) + " ";
256  }
257
258  return result;
259}
260
261// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java
262text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in, bool dmsafe) {
263 
264  text_t result = "";
265  text_t::const_iterator start = immuneChars.begin();
266  text_t::const_iterator end = immuneChars.end();
267
268  // Check if the character is in the list of chars immune to encoding
269  if(findchar(start, end, in) != end) {
270    result.push_back(in);
271  }
272
273  else if(isalnum((int)in)) {
274    result.push_back(in);
275  }
276
277  // Do not use these shortcuts as they can be used to break out of a context
278  // if ( ch == 0x00 ) return "\\0";
279  // if ( ch == 0x08 ) return "\\b";
280  // if ( ch == 0x09 ) return "\\t";
281  // if ( ch == 0x0a ) return "\\n";
282  // if ( ch == 0x0b ) return "\\v";
283  // if ( ch == 0x0c ) return "\\f";
284  // if ( ch == 0x0d ) return "\\r";
285  // if ( ch == 0x22 ) return "\\\"";
286  // if ( ch == 0x27 ) return "\\'";
287  // if ( ch == 0x5c ) return "\\\\";
288
289
290  // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH
291  else {
292
293    // encode up to 256 with \\xHH
294    if(in < 256) {
295      char hex_char[3];
296      sprintf(hex_char,"%02X",in);
297
298      if(dmsafe) { // double escape backslashes for macro files
299    result = "\\\\x" + text_t(hex_char);
300      } else {
301    result = "\\x" + text_t(hex_char);
302      }
303    }
304    // otherwise encode with \\uHHHH
305    else {
306      char hex_char[5];
307      sprintf(hex_char,"%04X",in);
308      if(dmsafe) { // double escape backslashes for macro files
309    result = "\\\\u" + text_t(hex_char);
310      } else {
311    result = "\\u" + text_t(hex_char);     
312      }
313    }
314   
315  }
316
317  return result;
318}
319
320
321/*
322http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java
323 Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet
324 which states:
325 "This technique works like this. Each DBMS supports one or more character escaping schemes
326 specific to certain kinds of queries. If you then escape all user supplied input using the
327 proper escaping scheme for the database you are using, the DBMS will not confuse that input
328 with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities."
329
330http://www.php.net/manual/en/mysqli.real-escape-string.php
331http://www.php.net/manual/en/function.mysql-real-escape-string.php
332http://www.php.net/manual/en/function.sqlite-escape-string.php
333http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases
334http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp
335
336*/
337text_t encodeForMySQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) {
338 
339  text_t result = "";
340  text_t::const_iterator start = immuneChars.begin();
341  text_t::const_iterator end = immuneChars.end();
342
343  // Check if the character is in the list of chars immune to encoding
344  if(findchar(start, end, in) != end) {
345    result.push_back(in);
346  }
347
348  else if(isalnum((int)in)) {
349    result.push_back(in);
350  }
351
352  // switch( mode ) {
353  // case ANSI: return encodeCharacterANSI( c );
354  // case STANDARD: return encodeCharacterMySQL( c );
355  // }
356
357  if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL
358
359    if ( in == 0x00 ) result = "\\0";
360    else if ( in == 0x08 ) result = "\\b";
361    else if ( in == 0x09 ) result = "\\t";
362    else if ( in == 0x0a ) result = "\\n";
363    else if ( in == 0x0d ) result = "\\r";
364    else if ( in == 0x1a ) result = "\\Z";
365    else if ( in == 0x22 ) result = "\\\"";
366    else if ( in == 0x25 ) result = "\\%";
367    else if ( in == 0x27 ) result = "\\'";
368    else if ( in == 0x5c ) result = "\\\\";
369    else if ( in == 0x5f ) result = "\\_";
370    else {
371      result = "\\";
372      result.push_back(in);
373    }
374
375  } else { // mode is ANSI, encodeCharacterANSI:
376
377    /* Encode for ANSI SQL.
378     Apostrophe is encoded
379     Bug ###: In ANSI Mode Strings can also be passed in using the quotation.
380     In ANSI_QUOTES mode a quotation is considered to be an identifier, thus
381     cannot be used at all in a value and will be dropped completely.
382     returns a string encoded to standards of MySQL running in ANSI mode
383    */
384
385    if ( in == '\'' ) result = "\'\'";
386    else if ( in == '\"' ) result = "";
387    else result.push_back(in);
388   
389  }
390
391  return result;
392}
393
394// See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c)
395// http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java
396// See Integer.toHexString()
397// http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29
398// http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c
399bool isAlphaNumeric(const unsigned short c) {
400  if(c >= 0xFF) {   // >= 256 need no further checking, it is not alphanumeric
401    return false;
402  }
403  // alphanumeric: 0 - 9 || A - Z || a - z
404  if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) {
405    return true;
406  }
407  // < 255, but not alphanumeric
408  return false;
409}
Note: See TracBrowser for help on using the browser.