/********************************************************************** * * securitytools.cpp -- a C++ port of the required functions from the OWASP ESAPI for Java * Copyright (C) 2014 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ /* * https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html * http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html * * The OWASP-ESAPI for C++'s online API is outdated/different to the actual method definitions in the * downloaded version of the code at runtime-src/packages/security/installed/include * Further, most of the necessary methods in the ESAPI-for-C++ have not been implemented yet. * The ESAPI-for-C, whose code is more complete, doesn't seem to have the same structure as the Java version. * * As a consequence, this file now contains custom Greenstone C++ functions that port the Java versions of the * required methods from the OWASP for Java API at http://code.google.com/p/owasp-esapi-java/. * In particular the codecs and the DefaultEncoder at * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Fcodecs * http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/#esapi%2Freference%253Fstate%253Dclosed */ //http://www.cplusplus.com/reference/cctype/isalnum/ #include #include #include "securitytools.h" static bool security_on = true; // function prototype - unused. Using ctype::isalphanum(int) instead bool isAlphaNumeric(const unsigned short c); /* Rule 5 of the OWASP XSS cheat sheet states: https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.235_-_URL_Escape_Before_Inserting_Untrusted_Data_into_HTML_URL_Parameter_Values WARNING: Do not encode complete or relative URL's with URL encoding! If untrusted input is meant to be placed into href, src or other URL-based attributes, it should be validated to make sure it does not point to an unexpected protocol, especially Javascript links. URL's should then be encoded based on the context of display like any other piece of data. For example, user driven URL's in HREF links should be attribute encoded. For example: String userURL = request.getParameter( "userURL" ) boolean isValidURL = ESAPI.validator().isValidInput("URLContext", userURL, "URL", 255, false); if (isValidURL) { link } The following function is simpler than theirs as it only looks for any "javascript:" in the string. If present, it returns false and the string should get URL encoded. Otherwise, the protocol is assumed to be valid and it returns true. */ bool isValidURLProtocol(const text_t& url) { text_t::const_iterator here = url.begin(); text_t::const_iterator end = url.end(); if(findword(here, end, "javascript:") != end) { return true; } return false; } text_t encodeForHTMLAttr(const text_t& in, const text_t& immuneChars) { text_t out; text_t::const_iterator here = in.begin(); text_t::const_iterator end = in.end(); while (here != end) { out += encodeForHTML(immuneChars, *here); // IMMUNE_HTMLATTR by default ++here; } return out; } text_t encodeForHTML(const text_t& in, const text_t& immuneChars) { text_t out; text_t::const_iterator here = in.begin(); text_t::const_iterator end = in.end(); while (here != end) { out += encodeForHTML(immuneChars, *here); // IMMUNE_HTML by default ++here; } return out; } text_t encodeForCSS(const text_t& in, const text_t& immuneChars) { text_t out; text_t::const_iterator here = in.begin(); text_t::const_iterator end = in.end(); while (here != end) { out += encodeForCSS(immuneChars, *here); // IMMUNE_CSS by default ++here; } return out; } text_t encodeForURL(const text_t& in, const text_t& immuneChars) { text_t out; text_t::const_iterator here = in.begin(); text_t::const_iterator end = in.end(); while (here != end) { out += encodeForURL(immuneChars, *here); // IMMUNE_URL by default ++here; } return out; } text_t encodeForJavascript(const text_t& in, const text_t& immuneChars, bool dmsafe) { text_t out; text_t::const_iterator here = in.begin(); text_t::const_iterator end = in.end(); while (here != end) { out += encodeForJavascript(immuneChars, *here, dmsafe); // IMMUNE_JAVASCRIPT by default ++here; } return out; } text_t encodeForSQL(const text_t& in, const text_t& immuneChars, const SQLMode mode) { text_t out; text_t::const_iterator here = in.begin(); text_t::const_iterator end = in.end(); while (here != end) { out += encodeForSQL(immuneChars, *here, mode); // IMMUNE_SQL and STANDARD SQLMode by default ++here; } return out; } /* The encodeForURL() here function follows the same rules as Java's URLEncoder, since that is called by the OWASP-for-Java code when the OWASP project wishes to encode strings for URL contexts: http://docs.oracle.com/javase/6/docs/api/java/net/URLEncoder.html When encoding a String, the following rules apply: The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same. The special characters ".", "-", "*", and "_" remain the same. The space character " " is converted into a plus sign "+". All other characters are unsafe and are first converted into one or more bytes using some encoding scheme. Then each byte is represented by the 3-character string "%xy", where xy is the two-digit hexadecimal representation of the byte. The recommended encoding scheme to use is UTF-8. However, for compatibility reasons, if an encoding is not specified, then the default encoding of the platform is used. */ text_t encodeForURL(const text_t& immuneChars, const unsigned short in) { text_t result = ""; text_t::const_iterator here = immuneChars.begin(); text_t::const_iterator end = immuneChars.end(); // Check if the character is in the list of chars immune to encoding if(findchar(here, end, in) != end) { result.push_back(in); } else if(isalnum((int)in)) { result.push_back(in); } // for URLs, space becomes + else if(in == ' ' ) { result.push_back('+'); } // all other chars converted to hexadecimal %XY else { char hex_char[4]; sprintf(hex_char,"%%%02X",in); result = text_t(hex_char); // result += hex_char; } return result; } // encodes for both HTML and HTML attributes. // The chars in the immuneChars array determines which of the two this is // See http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java text_t encodeForHTML(const text_t& immuneChars, const unsigned short in) { text_t result = ""; text_t::const_iterator here = immuneChars.begin(); text_t::const_iterator end = immuneChars.end(); // Check if the character is in the list of chars immune to encoding if(findchar(here, end, in) != end) { result.push_back(in); } else if(isalnum((int)in)) { result.push_back(in); } // check for illegal characters // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/HTMLEntityCodec.java // 0x1f is the unit separator, an invisible character, 0x7f is the ascii control code for delete, not sure about 0x9f // Encode all these as the UTF-8 replacement char ufffd, which is used to replace an unknown or unrepresentable character else if ( ( in <= 0x1f && in != '\t' && in != '\n' && in != '\r' ) || ( in >= 0x7f && in <= 0x9f ) ) { result = "&#x" + REPLACEMENT_HEX + ";"; // Let's entity encode this instead of returning it //c = REPLACEMENT_CHAR; } // all other chars are to be converted to hexadecimal AB, then return the hex entity, which is of the form « else { char hex_char[3]; sprintf(hex_char,"%02X",in); result = "&#x" + text_t(hex_char) + ";"; } return result; } // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/CSSCodec.java // return the hex and end in whitespace to terminate text_t encodeForCSS(const text_t& immuneChars, const unsigned short in) { text_t result = ""; text_t::const_iterator here = immuneChars.begin(); text_t::const_iterator end = immuneChars.end(); // Check if the character is in the list of chars immune to encoding if(findchar(here, end, in) != end) { result.push_back(in); } else if(isalnum((int)in)) { result.push_back(in); } // all other chars converted to hexadecimal AB, then return the hex entity, which is of the form « else { char hex_char[3]; sprintf(hex_char,"%02X",in); // return the hex and end in whitespace to terminate result = "\\" + text_t(hex_char) + " "; } return result; } // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/JavaScriptCodec.java text_t encodeForJavascript(const text_t& immuneChars, const unsigned short in, bool dmsafe) { text_t result = ""; text_t::const_iterator start = immuneChars.begin(); text_t::const_iterator end = immuneChars.end(); // Check if the character is in the list of chars immune to encoding if(findchar(start, end, in) != end) { result.push_back(in); } else if(isalnum((int)in)) { result.push_back(in); } // Do not use these shortcuts as they can be used to break out of a context // if ( ch == 0x00 ) return "\\0"; // if ( ch == 0x08 ) return "\\b"; // if ( ch == 0x09 ) return "\\t"; // if ( ch == 0x0a ) return "\\n"; // if ( ch == 0x0b ) return "\\v"; // if ( ch == 0x0c ) return "\\f"; // if ( ch == 0x0d ) return "\\r"; // if ( ch == 0x22 ) return "\\\""; // if ( ch == 0x27 ) return "\\'"; // if ( ch == 0x5c ) return "\\\\"; // encode up to 256 with hexadecimal \\xHH, otherwise encode with \\uHHHH else { // encode up to 256 with \\xHH if(in < 256) { char hex_char[3]; sprintf(hex_char,"%02X",in); if(dmsafe) { // double escape backslashes for macro files result = "\\\\x" + text_t(hex_char); } else { result = "\\x" + text_t(hex_char); } } // otherwise encode with \\uHHHH else { char hex_char[5]; sprintf(hex_char,"%04X",in); if(dmsafe) { // double escape backslashes for macro files result = "\\\\u" + text_t(hex_char); } else { result = "\\u" + text_t(hex_char); } } } return result; } /* C++ port of OWASP-ESAPI for MySQL. Not sure if this is is the same for SQLite http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/MySQLCodec.java Defense Option 3 of https://www.owasp.org/index.php/SQL_Injection_Prevention_Cheat_Sheet which states: "This technique works like this. Each DBMS supports one or more character escaping schemes specific to certain kinds of queries. If you then escape all user supplied input using the proper escaping scheme for the database you are using, the DBMS will not confuse that input with SQL code written by the developer, thus avoiding any possible SQL injection vulnerabilities." http://www.php.net/manual/en/mysqli.real-escape-string.php http://www.php.net/manual/en/function.mysql-real-escape-string.php http://www.php.net/manual/en/function.sqlite-escape-string.php http://stackoverflow.com/questions/8838913/difference-between-mysql-sqlite-etc-databases http://stackoverflow.com/questions/633245/sql-escape-with-sqlite-in-c-sharp */ text_t encodeForSQL(const text_t& immuneChars, const unsigned short in, const SQLMode mode) { text_t result = ""; text_t::const_iterator start = immuneChars.begin(); text_t::const_iterator end = immuneChars.end(); // Check if the character is in the list of chars immune to encoding if(findchar(start, end, in) != end) { result.push_back(in); } else if(isalnum((int)in)) { result.push_back(in); } // switch( mode ) { // case ANSI: return encodeCharacterANSI( c ); // case STANDARD: return encodeCharacterMySQL( c ); // } if(mode == STANDARD) { // encodeCharacterMySQL: Encode a character suitable for MySQL if ( in == 0x00 ) result = "\\0"; else if ( in == 0x08 ) result = "\\b"; else if ( in == 0x09 ) result = "\\t"; else if ( in == 0x0a ) result = "\\n"; else if ( in == 0x0d ) result = "\\r"; else if ( in == 0x1a ) result = "\\Z"; else if ( in == 0x22 ) result = "\\\""; else if ( in == 0x25 ) result = "\\%"; else if ( in == 0x27 ) result = "\\'"; else if ( in == 0x5c ) result = "\\\\"; else if ( in == 0x5f ) result = "\\_"; else { result = "\\"; result.push_back(in); } } else { // mode is ANSI, encodeCharacterANSI: /* Encode for ANSI SQL. Apostrophe is encoded Bug ###: In ANSI Mode Strings can also be passed in using the quotation. In ANSI_QUOTES mode a quotation is considered to be an identifier, thus cannot be used at all in a value and will be dropped completely. returns a string encoded to standards of MySQL running in ANSI mode */ if ( in == '\'' ) result = "\'\'"; else if ( in == '\"' ) result = ""; else result.push_back(in); } return result; } // Unused at present. // See Codec.hex[] initialization and Codec.getHexForNonAlphanumeric(c) and Codec.toHex(c) // http://code.google.com/p/owasp-esapi-java/source/browse/trunk/src/main/java/org/owasp/esapi/codecs/Codec.java // See Integer.toHexString() // http://docs.oracle.com/javase/6/docs/api/java/lang/Integer.html#toHexString%28int%29 // http://stackoverflow.com/questions/3370004/what-is-static-block-in-c-or-c bool isAlphaNumeric(const unsigned short c) { if(c >= 0xFF) { // >= 256 need no further checking, it is not alphanumeric return false; } // alphanumeric: 0 - 9 || A - Z || a - z if ( c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A ) { return true; } // < 255, but not alphanumeric return false; }