ESAPI-C 1.0
The OWASP Enterprise Security API for C
|
00001 00033 /* base64.c -- Encode binary data using printable characters. 00034 Copyright (C) 1999, 2000, 2001, 2004, 2005, 2006 Free Software 00035 Foundation, Inc. 00036 00037 This program is free software; you can redistribute it and/or modify 00038 it under the terms of the GNU General Public License as published by 00039 the Free Software Foundation; either version 2, or (at your option) 00040 any later version. 00041 00042 This program is distributed in the hope that it will be useful, 00043 but WITHOUT ANY WARRANTY; without even the implied warranty of 00044 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00045 GNU General Public License for more details. 00046 00047 You should have received a copy of the GNU General Public License 00048 along with this program; if not, write to the Free Software Foundation, 00049 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 00050 00051 /* 00052 #include <config.h> 00053 */ 00054 00055 /* Get prototype. */ 00056 #include "base64.h" 00057 00058 /* Get malloc. */ 00059 #include <stdlib.h> 00060 00061 /* Get UCHAR_MAX. */ 00062 #include <limits.h> 00063 00064 /* C89 compliant way to cast 'char' to 'unsigned char'. */ 00065 static inline unsigned char to_uchar(char ch) { 00066 return ch; 00067 } 00068 00069 /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN. 00070 If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as 00071 possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero 00072 terminate the output buffer. */ 00073 void base64_encode(const char *in, size_t inlen, char *out, size_t outlen) { 00074 static const char b64str[64] = 00075 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 00076 00077 while (inlen && outlen) { 00078 *out++ = b64str[(to_uchar(in[0]) >> 2) & 0x3f]; 00079 if (!--outlen) 00080 break; 00081 *out++ = b64str[((to_uchar(in[0]) << 4) + (--inlen ? to_uchar(in[1]) 00082 >> 4 : 0)) & 0x3f]; 00083 if (!--outlen) 00084 break; 00085 *out++ = (inlen ? b64str[((to_uchar(in[1]) << 2) + (--inlen ? to_uchar( 00086 in[2]) >> 6 : 0)) & 0x3f] : '='); 00087 if (!--outlen) 00088 break; 00089 *out++ = inlen ? b64str[to_uchar(in[2]) & 0x3f] : '='; 00090 if (!--outlen) 00091 break; 00092 if (inlen) 00093 inlen--; 00094 if (inlen) 00095 in += 3; 00096 } 00097 00098 if (outlen) 00099 *out = '\0'; 00100 } 00101 00102 /* Allocate a buffer and store zero terminated base64 encoded data 00103 from array IN of size INLEN, returning BASE64_LENGTH(INLEN), i.e., 00104 the length of the encoded data, excluding the terminating zero. On 00105 return, the OUT variable will hold a pointer to newly allocated 00106 memory that must be deallocated by the caller. If output string 00107 length would overflow, 0 is returned and OUT is set to NULL. If 00108 memory allocation failed, OUT is set to NULL, and the return value 00109 indicates length of the requested memory block, i.e., 00110 BASE64_LENGTH(inlen) + 1. */ 00111 size_t base64_encode_alloc(const char *in, size_t inlen, char **out) { 00112 size_t outlen = 1 + BASE64_LENGTH (inlen); 00113 00114 /* Check for overflow in outlen computation. 00115 * 00116 * If there is no overflow, outlen >= inlen. 00117 * 00118 * If the operation (inlen + 2) overflows then it yields at most +1, so 00119 * outlen is 0. 00120 * 00121 * If the multiplication overflows, we lose at least half of the 00122 * correct value, so the result is < ((inlen + 2) / 3) * 2, which is 00123 * less than (inlen + 2) * 0.66667, which is less than inlen as soon as 00124 * (inlen > 4). 00125 */ 00126 if (inlen > outlen) { 00127 *out = NULL; 00128 return 0; 00129 } 00130 00131 *out = malloc(outlen); 00132 if (!*out) 00133 return outlen; 00134 00135 base64_encode(in, inlen, *out, outlen); 00136 00137 return outlen - 1; 00138 } 00139 00140 /* With this approach this file works independent of the charset used 00141 (think EBCDIC). However, it does assume that the characters in the 00142 Base64 alphabet (A-Za-z0-9+/) are encoded in 0..255. POSIX 00143 1003.1-2001 require that char and unsigned char are 8-bit 00144 quantities, though, taking care of that problem. But this may be a 00145 potential problem on non-POSIX C99 platforms. 00146 00147 IBM C V6 for AIX mishandles "#define B64(x) ...'x'...", so use "_" 00148 as the formal parameter rather than "x". */ 00149 #define B64(_) \ 00150 ((_) == 'A' ? 0 \ 00151 : (_) == 'B' ? 1 \ 00152 : (_) == 'C' ? 2 \ 00153 : (_) == 'D' ? 3 \ 00154 : (_) == 'E' ? 4 \ 00155 : (_) == 'F' ? 5 \ 00156 : (_) == 'G' ? 6 \ 00157 : (_) == 'H' ? 7 \ 00158 : (_) == 'I' ? 8 \ 00159 : (_) == 'J' ? 9 \ 00160 : (_) == 'K' ? 10 \ 00161 : (_) == 'L' ? 11 \ 00162 : (_) == 'M' ? 12 \ 00163 : (_) == 'N' ? 13 \ 00164 : (_) == 'O' ? 14 \ 00165 : (_) == 'P' ? 15 \ 00166 : (_) == 'Q' ? 16 \ 00167 : (_) == 'R' ? 17 \ 00168 : (_) == 'S' ? 18 \ 00169 : (_) == 'T' ? 19 \ 00170 : (_) == 'U' ? 20 \ 00171 : (_) == 'V' ? 21 \ 00172 : (_) == 'W' ? 22 \ 00173 : (_) == 'X' ? 23 \ 00174 : (_) == 'Y' ? 24 \ 00175 : (_) == 'Z' ? 25 \ 00176 : (_) == 'a' ? 26 \ 00177 : (_) == 'b' ? 27 \ 00178 : (_) == 'c' ? 28 \ 00179 : (_) == 'd' ? 29 \ 00180 : (_) == 'e' ? 30 \ 00181 : (_) == 'f' ? 31 \ 00182 : (_) == 'g' ? 32 \ 00183 : (_) == 'h' ? 33 \ 00184 : (_) == 'i' ? 34 \ 00185 : (_) == 'j' ? 35 \ 00186 : (_) == 'k' ? 36 \ 00187 : (_) == 'l' ? 37 \ 00188 : (_) == 'm' ? 38 \ 00189 : (_) == 'n' ? 39 \ 00190 : (_) == 'o' ? 40 \ 00191 : (_) == 'p' ? 41 \ 00192 : (_) == 'q' ? 42 \ 00193 : (_) == 'r' ? 43 \ 00194 : (_) == 's' ? 44 \ 00195 : (_) == 't' ? 45 \ 00196 : (_) == 'u' ? 46 \ 00197 : (_) == 'v' ? 47 \ 00198 : (_) == 'w' ? 48 \ 00199 : (_) == 'x' ? 49 \ 00200 : (_) == 'y' ? 50 \ 00201 : (_) == 'z' ? 51 \ 00202 : (_) == '0' ? 52 \ 00203 : (_) == '1' ? 53 \ 00204 : (_) == '2' ? 54 \ 00205 : (_) == '3' ? 55 \ 00206 : (_) == '4' ? 56 \ 00207 : (_) == '5' ? 57 \ 00208 : (_) == '6' ? 58 \ 00209 : (_) == '7' ? 59 \ 00210 : (_) == '8' ? 60 \ 00211 : (_) == '9' ? 61 \ 00212 : (_) == '+' ? 62 \ 00213 : (_) == '/' ? 63 \ 00214 : -1) 00215 00216 static const signed char b64[0x100] = { B64 (0), B64 (1), B64 (2), B64 (3), 00217 B64 (4), B64 (5), B64 (6), B64 (7), B64 (8), B64 (9), B64 (10), 00218 B64 (11), B64 (12), B64 (13), B64 (14), B64 (15), B64 (16), 00219 B64 (17), B64 (18), B64 (19), B64 (20), B64 (21), B64 (22), 00220 B64 (23), B64 (24), B64 (25), B64 (26), B64 (27), B64 (28), 00221 B64 (29), B64 (30), B64 (31), B64 (32), B64 (33), B64 (34), 00222 B64 (35), B64 (36), B64 (37), B64 (38), B64 (39), B64 (40), 00223 B64 (41), B64 (42), B64 (43), B64 (44), B64 (45), B64 (46), 00224 B64 (47), B64 (48), B64 (49), B64 (50), B64 (51), B64 (52), 00225 B64 (53), B64 (54), B64 (55), B64 (56), B64 (57), B64 (58), 00226 B64 (59), B64 (60), B64 (61), B64 (62), B64 (63), B64 (64), 00227 B64 (65), B64 (66), B64 (67), B64 (68), B64 (69), B64 (70), 00228 B64 (71), B64 (72), B64 (73), B64 (74), B64 (75), B64 (76), 00229 B64 (77), B64 (78), B64 (79), B64 (80), B64 (81), B64 (82), 00230 B64 (83), B64 (84), B64 (85), B64 (86), B64 (87), B64 (88), 00231 B64 (89), B64 (90), B64 (91), B64 (92), B64 (93), B64 (94), 00232 B64 (95), B64 (96), B64 (97), B64 (98), B64 (99), B64 (100), 00233 B64 (101), B64 (102), B64 (103), B64 (104), B64 (105), B64 (106), 00234 B64 (107), B64 (108), B64 (109), B64 (110), B64 (111), B64 (112), 00235 B64 (113), B64 (114), B64 (115), B64 (116), B64 (117), B64 (118), 00236 B64 (119), B64 (120), B64 (121), B64 (122), B64 (123), B64 (124), 00237 B64 (125), B64 (126), B64 (127), B64 (128), B64 (129), B64 (130), 00238 B64 (131), B64 (132), B64 (133), B64 (134), B64 (135), B64 (136), 00239 B64 (137), B64 (138), B64 (139), B64 (140), B64 (141), B64 (142), 00240 B64 (143), B64 (144), B64 (145), B64 (146), B64 (147), B64 (148), 00241 B64 (149), B64 (150), B64 (151), B64 (152), B64 (153), B64 (154), 00242 B64 (155), B64 (156), B64 (157), B64 (158), B64 (159), B64 (160), 00243 B64 (161), B64 (162), B64 (163), B64 (164), B64 (165), B64 (166), 00244 B64 (167), B64 (168), B64 (169), B64 (170), B64 (171), B64 (172), 00245 B64 (173), B64 (174), B64 (175), B64 (176), B64 (177), B64 (178), 00246 B64 (179), B64 (180), B64 (181), B64 (182), B64 (183), B64 (184), 00247 B64 (185), B64 (186), B64 (187), B64 (188), B64 (189), B64 (190), 00248 B64 (191), B64 (192), B64 (193), B64 (194), B64 (195), B64 (196), 00249 B64 (197), B64 (198), B64 (199), B64 (200), B64 (201), B64 (202), 00250 B64 (203), B64 (204), B64 (205), B64 (206), B64 (207), B64 (208), 00251 B64 (209), B64 (210), B64 (211), B64 (212), B64 (213), B64 (214), 00252 B64 (215), B64 (216), B64 (217), B64 (218), B64 (219), B64 (220), 00253 B64 (221), B64 (222), B64 (223), B64 (224), B64 (225), B64 (226), 00254 B64 (227), B64 (228), B64 (229), B64 (230), B64 (231), B64 (232), 00255 B64 (233), B64 (234), B64 (235), B64 (236), B64 (237), B64 (238), 00256 B64 (239), B64 (240), B64 (241), B64 (242), B64 (243), B64 (244), 00257 B64 (245), B64 (246), B64 (247), B64 (248), B64 (249), B64 (250), 00258 B64 (251), B64 (252), B64 (253), B64 (254), B64 (255) }; 00259 00260 #if UCHAR_MAX == 255 00261 # define uchar_in_range(c) true 00262 #else 00263 # define uchar_in_range(c) ((c) <= 255) 00264 #endif 00265 00266 /* Return true if CH is a character from the Base64 alphabet, and 00267 false otherwise. Note that '=' is padding and not considered to be 00268 part of the alphabet. */ 00269 bool isbase64(char ch) { 00270 return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar(ch)]; 00271 } 00272 00273 /* Decode base64 encoded input array IN of length INLEN to output 00274 array OUT that can hold *OUTLEN bytes. Return true if decoding was 00275 successful, i.e. if the input was valid base64 data, false 00276 otherwise. If *OUTLEN is too small, as many bytes as possible will 00277 be written to OUT. On return, *OUTLEN holds the length of decoded 00278 bytes in OUT. Note that as soon as any non-alphabet characters are 00279 encountered, decoding is stopped and false is returned. This means 00280 that, when applicable, you must remove any line terminators that is 00281 part of the data stream before calling this function. */ 00282 bool base64_decode(const char *in, size_t inlen, char *out, size_t *outlen) { 00283 size_t outleft = *outlen; 00284 00285 while (inlen >= 2) { 00286 if (!isbase64(in[0]) || !isbase64(in[1])) 00287 break; 00288 00289 if (outleft) { 00290 *out++ 00291 = ((b64[to_uchar(in[0])] << 2) 00292 | (b64[to_uchar(in[1])] >> 4)); 00293 outleft--; 00294 } 00295 00296 if (inlen == 2) 00297 break; 00298 00299 if (in[2] == '=') { 00300 if (inlen != 4) 00301 break; 00302 00303 if (in[3] != '=') 00304 break; 00305 00306 } else { 00307 if (!isbase64(in[2])) 00308 break; 00309 00310 if (outleft) { 00311 *out++ = (((b64[to_uchar(in[1])] << 4) & 0xf0) | (b64[to_uchar( 00312 in[2])] >> 2)); 00313 outleft--; 00314 } 00315 00316 if (inlen == 3) 00317 break; 00318 00319 if (in[3] == '=') { 00320 if (inlen != 4) 00321 break; 00322 } else { 00323 if (!isbase64(in[3])) 00324 break; 00325 00326 if (outleft) { 00327 *out++ = (((b64[to_uchar(in[2])] << 6) & 0xc0) 00328 | b64[to_uchar(in[3])]); 00329 outleft--; 00330 } 00331 } 00332 } 00333 00334 in += 4; 00335 inlen -= 4; 00336 } 00337 00338 *outlen -= outleft; 00339 00340 if (inlen != 0) 00341 return false; 00342 00343 return true; 00344 } 00345 00346 /* Allocate an output buffer in *OUT, and decode the base64 encoded 00347 data stored in IN of size INLEN to the *OUT buffer. On return, the 00348 size of the decoded data is stored in *OUTLEN. OUTLEN may be NULL, 00349 if the caller is not interested in the decoded length. *OUT may be 00350 NULL to indicate an out of memory error, in which case *OUTLEN 00351 contains the size of the memory block needed. The function returns 00352 true on successful decoding and memory allocation errors. (Use the 00353 *OUT and *OUTLEN parameters to differentiate between successful 00354 decoding and memory error.) The function returns false if the 00355 input was invalid, in which case *OUT is NULL and *OUTLEN is 00356 undefined. */ 00357 bool base64_decode_alloc(const char *in, size_t inlen, char **out, 00358 size_t *outlen) { 00359 /* This may allocate a few bytes too much, depending on input, 00360 but it's not worth the extra CPU time to compute the exact amount. 00361 The exact amount is 3 * inlen / 4, minus 1 if the input ends 00362 with "=" and minus another 1 if the input ends with "==". 00363 Dividing before multiplying avoids the possibility of overflow. */ 00364 size_t needlen = 3 * (inlen / 4) + 2; 00365 00366 *out = malloc(needlen); 00367 if (!*out) 00368 return true; 00369 00370 if (!base64_decode(in, inlen, *out, &needlen)) { 00371 free(*out); 00372 *out = NULL; 00373 return false; 00374 } 00375 00376 if (outlen) 00377 *outlen = needlen; 00378 00379 return true; 00380 } 00381