ESAPI-C 1.0
The OWASP Enterprise Security API for C

canonicalize.c

Go to the documentation of this file.
00001 
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <stdbool.h>
00017 #include "codec.h"
00018 #include "canonicalize.h"
00019 
00020 #define GROW_LEN 32
00021 
00022 // Encode algorithm:
00023 // We must decode atoms and encode characters
00024 
00025 //char decode_char(char *input, int *index, char *pushback) {
00026 //      return next(input, index, pushback);
00027 //}
00028 
00029 char *straight_port_decode(codec *c, char *input) {
00030 //      printf("Begin straight_port_decode\n");
00031         char pushback = -1;
00032         int bufflen = strlen(input)+1;
00033         char *buff = (char *) malloc(bufflen);
00034         int buffpos = 0;
00035         int index = 0;
00036 
00037            /* The function pointer needed to canonicalize for the given codec. */
00038            char (*decode_char)(const char *, int *, char *) = c->decode_char;
00039 
00040         int backstop = 0;
00041         while (hasNext(input, index, pushback)) {
00042                 if (backstop++ > 30) exit(-1);
00043                 char ch = decode_char(input, &index, &pushback);
00044 //                 printf("Decoded character: %c\n", ch);
00045               if ( buffpos + 1 > bufflen ) {
00046 //                              printf("Resizing output buffer\n");
00047                   bufflen = buffpos + 1 + GROW_LEN;
00048                  buff = realloc(buff, bufflen);
00049               }
00050 
00051                  if (ch == -1) {
00052                          ch = next(input, &index, &pushback);
00053                  }
00054 //                      printf("Appending decoded character %c to %s at offset %d\n", ch, buff, buffpos);
00055                  buff[buffpos] = ch;
00056                  buffpos++;
00057                  buff[buffpos] = '\0';
00058         }
00059 
00060 //      printf("End straight_port_decode returning value %s\n", buff);
00061         return buff;
00062 }
00063 
00064 //char encode_char(char *output, char c) {
00065 //      printf("Encoding character: %c\n", c);
00066 //      output[0] = c;
00067 //      output[1] = '\0';
00068 //
00069 //      return output;
00070 //}
00071 
00072 char *straight_port_encode(codec *c, char *input) {
00073 //      printf("Begin straight_port_encode\n");
00074         int bufflen = strlen(input)+1;
00075         char *buff = (char *) malloc(bufflen);
00076         char *tokenbuff = (char *) malloc(32);
00077         int tokenlen = 0;
00078         int buffpos = 0;
00079 
00080            /* The function pointer needed to canonicalize for the given codec. */
00081            char *(*encode_char)(char *, char) = c->encode_char;
00082 
00083         int inputlen = strlen(input);
00084         int i;
00085         for (i = 0; i < inputlen; i++) {
00086                         char ch = input[i];
00087                    // FIXME: We could pass in a set of immune (unencodable) characters here
00088                         encode_char(tokenbuff, ch);
00089               tokenlen = strlen(tokenbuff);
00090               if ( buffpos + tokenlen + 1 > bufflen ) {
00091 //                      printf("Resizing output buffer\n");
00092                   bufflen = buffpos + tokenlen + 1 + GROW_LEN;
00093                  buff = realloc(buff, bufflen);
00094               }
00095 
00096 //              printf("Appending encoded token %s to %s at offset %d\n", tokenbuff, buff, buffpos);
00097               memcpy(buff+buffpos, tokenbuff, tokenlen);
00098               buffpos += tokenlen;
00099               buff[buffpos] = '\0';
00100         }
00101 
00102 //      printf("End straight_port_encode returning value %s\n", buff);
00103         return buff;
00104 }
00105 
00106 
00107 
00108 char *_codec_decode(codec *c, char *s) {
00109         return straight_port_decode(c, s);
00110 }
00111 
00112 char *_codec_encode(codec *c, char *s) {
00113         return straight_port_encode(c, s);
00114 }
00115 
00116 /*
00117  * Decode an input according to an array of codecs. It's very likely that
00118  * the order of the codecs matters.
00119  */
00120 char *esapi_canonicalize(const char *input, codec *codecs, int codec_count, bool strict) {
00121         if ( input == NULL ) {
00122                 return NULL;
00123         }
00124 
00125         codec *current_codec;
00126         //codec *codecs = esapi_get_codecs();
00127         //int codec_count = esapi_get_codec_count();
00128 
00129 
00130         char *working = (char *)input;
00131         codec *codec_found = NULL;
00132         int codec_found_count = 0;
00133         int codec_mixed_count = 1;
00134         bool clean = false;
00135         while( !clean ) {
00136                 clean = true;
00137 
00138                 // try each codec and keep track of which ones work
00139                 int i, offset;
00140                 char *old;
00141                 for(i = 0; i < codec_count; i++) {
00142                         offset = sizeof(struct codec *) * i;
00143                         current_codec = codecs + offset;
00144                         old = working;
00145                         working = _codec_decode( current_codec, working );
00146                         if ( strcmp(old, working ) != 0 ) {
00147                                 if ( codec_found != NULL && codec_found != current_codec ) {
00148                                         codec_mixed_count++;
00149                                 }
00150                                 codec_found = current_codec;
00151                                 if ( clean ) {
00152                                         codec_found_count++;
00153                                 }
00154                                 clean = false;
00155                         }
00156                 }
00157         }
00158 
00159         // do strict tests and handle if any mixed, multiple, nested encoding were found
00160         if ( codec_found_count >= 2 && codec_mixed_count > 1 ) {
00161                 if ( strict ) {
00162                     fprintf(stderr, "Input validation failure: Multiple (%dx) and mixed encoding (%dx) detected in %s\n", codec_found_count, codec_mixed_count, __func__);
00163                     return NULL;
00164 //                      throw new IntrusionException( "Input validation failure", "Multiple ("+ codec_found_count +"x) and mixed encoding ("+ codec_mixed_count +"x) detected in " + input );
00165                 } else {
00166                     fprintf(stderr, "Multiple (%dx) and mixed encoding (%dx) detected in %s\n", codec_found_count, codec_mixed_count, __func__);
00167 //                      logger.warning( Logger.SECURITY_FAILURE, "Multiple ("+ codec_found_count +"x) and mixed encoding ("+ codec_mixed_count +"x) detected in " + input );
00168                 }
00169         }
00170         else if ( codec_found_count >= 2 ) {
00171                 if ( strict ) {
00172                     fprintf(stderr, "Input validation failure: Multiple encoding (%dx) detected in %s\n", codec_found_count, __func__);
00173                     return NULL;
00174 //                      throw new IntrusionException( "Input validation failure", "Multiple ("+ codec_found_count +"x) encoding detected in " + input );
00175                 } else {
00176                     fprintf(stderr, "Multiple encoding (%dx) detected in %s\n", codec_found_count, __func__);
00177 //                      logger.warning( Logger.SECURITY_FAILURE, "Multiple ("+ codec_found_count +"x) encoding detected in " + input );
00178                 }
00179         }
00180         else if ( codec_mixed_count > 1 ) {
00181                 if ( strict ) {
00182                     fprintf(stderr, "Input validation failure: Mixed encoding (%dx) detected in %s\n", codec_mixed_count, __func__);
00183                     return NULL;
00184 //                      throw new IntrusionException( "Input validation failure", "Mixed encoding ("+ codec_mixed_count +"x) detected in " + input );
00185                 } else {
00186                     fprintf(stderr, "Mixed encoding (%dx) detected in %s\n", codec_mixed_count, __func__);
00187 //                      logger.warning( Logger.SECURITY_FAILURE, "Mixed encoding ("+ codec_mixed_count +"x) detected in " + input );
00188                 }
00189         }
00190         return working;
00191 }
00192 
 All Data Structures Files Functions Variables Typedefs Defines