diff options
author | Alexander Barkov <bar@mnogosearch.org> | 2013-09-23 18:58:33 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mnogosearch.org> | 2013-09-23 18:58:33 +0400 |
commit | e33582d20d2a9f215dc4d0effa55886bbabdce3d (patch) | |
tree | eebea43211dfaefb954c40af5b72afe97019b572 /mysys/base64.c | |
parent | 9cbd53bfb2e72376080a3951185e4780b0519718 (diff) | |
download | mariadb-git-e33582d20d2a9f215dc4d0effa55886bbabdce3d.tar.gz |
Merging TO_BASE64() and FROM_BASE64() from MySQL-5.6
Diffstat (limited to 'mysys/base64.c')
-rw-r--r-- | mysys/base64.c | 350 |
1 files changed, 247 insertions, 103 deletions
diff --git a/mysys/base64.c b/mysys/base64.c index b48bcb85e03..88aab7a6450 100644 --- a/mysys/base64.c +++ b/mysys/base64.c @@ -1,5 +1,5 @@ -/* Copyright (c) 2003-2008 MySQL AB, 2009 Sun Microsystems, Inc. - Use is subject to license terms. +/* Copyright (c) 2003, 2010, Oracle and/or its affiliates. + Copyright (c) 2013, MariaDB Foundation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,28 @@ static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; +/** + * Maximum length base64_needed_encoded_length() + * can handle without signed integer overflow: (x + 2) / 3 * 4 + */ +int +base64_encode_max_arg_length() +{ +#if (SIZEOF_INT == 8) + /* + (6827690988321067803 + 2) / 3 + 4 -> 9223372036854775805 Okey + (6827690988321067804 + 2) / 3 + 4 -> -9223372036854775807 Overflow + */ + return 0x5EC0D4C77B03531BLL; /* 6827690988321067803 */ +#else + /* + 1589695686 -> 2147483646 (7FFFFFFE) + 1589695687 -> -2147483645 + */ + return 0x5EC0D4C6; /* 1589695686 */ +#endif +} + int base64_needed_encoded_length(int length_of_data) @@ -39,10 +61,24 @@ base64_needed_encoded_length(int length_of_data) } +/** + * Maximum length supported by base64_decode(). + */ +int +base64_decode_max_arg_length() +{ +#if (SIZEOF_INT == 8) + return 0x7FFFFFFFFFFFFFFFLL; +#else + return 0x7FFFFFFF; +#endif +} + + int base64_needed_decoded_length(int length_of_encoded_data) { - return (int) ceil(length_of_encoded_data * 3 / 4); + return (int) ((longlong) length_of_encoded_data + 3) / 4 * 3; } @@ -51,6 +87,11 @@ base64_needed_decoded_length(int length_of_encoded_data) Note: We require that dst is pre-allocated to correct size. See base64_needed_encoded_length(). + + Note: We add line separators every 76 characters. + + Note: The output string is properly padded with the '=' character, + so the length of the output string is always divisable by 4. */ int @@ -101,130 +142,233 @@ base64_encode(const void *src, size_t src_len, char *dst) } -static inline uint -pos(unsigned char c) +/* + Base64 decoder stream +*/ +typedef struct my_base64_decoder_t { - return (uint) (strchr(base64_table, c) - base64_table); -} - - -#define SKIP_SPACE(src, i, size) \ -{ \ - while (i < size && my_isspace(&my_charset_latin1, * src)) \ - { \ - i++; \ - src++; \ - } \ - if (i == size) \ - { \ - break; \ - } \ -} + const char *src; /* Pointer to the current input position */ + const char *end; /* Pointer to the end of input buffer */ + uint c; /* Collect bits into this number */ + int error; /* Error code */ + uchar state; /* Character number in the current group of 4 */ + uchar mark; /* Number of padding marks in the current group */ +} MY_BASE64_DECODER; /* - Decode a base64 string - - SYNOPSIS - base64_decode() - src Pointer to base64-encoded string - len Length of string at 'src' - dst Pointer to location where decoded data will be stored - end_ptr Pointer to variable that will refer to the character - after the end of the encoded data that were decoded. Can - be NULL. - - DESCRIPTION - - The base64-encoded data in the range ['src','*end_ptr') will be - decoded and stored starting at 'dst'. The decoding will stop - after 'len' characters have been read from 'src', or when padding - occurs in the base64-encoded data. In either case: if 'end_ptr' is - non-null, '*end_ptr' will be set to point to the character after - the last read character, even in the presence of error. - - NOTE - We require that 'dst' is pre-allocated to correct size. - - SEE ALSO - base64_needed_decoded_length(). - - RETURN VALUE - Number of bytes written at 'dst' or -1 in case of failure + Helper table for decoder. + -2 means "space character" + -1 means "bad character" + Non-negative values mean valid base64 encoding character. */ -int -base64_decode(const char *src_base, size_t len, - void *dst, const char **end_ptr) +static int8 +from_base64_table[]= { - char b[3]; - size_t i= 0; - char *dst_base= (char *)dst; - char const *src= src_base; - char *d= dst_base; - size_t j; - - while (i < len) +/*00*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-2,-2,-2,-2,-2,-1,-1, +/*10*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*20*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* !"#$%&'()*+,-./ */ +/*30*/ 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 0123456789:;<=>? */ +/*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* @ABCDEFGHIJKLMNO */ +/*50*/ 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* PQRSTUVWXYZ[\]^_ */ +/*60*/ -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* `abcdefghijklmno */ +/*70*/ 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* pqrstuvwxyz{|}~ */ +/*80*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*90*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*A0*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*B0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*C0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*D0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*E0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +/*F0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 +}; + + +/** + * Skip leading spaces in a base64 encoded stream + * and stop on the first non-space character. + * decoder->src will point to the first non-space character, + * or to the end of the input string. + * In case when end-of-input met on unexpected position, + * decoder->error is also set to 1. + * + * See http://en.wikipedia.org/wiki/Base64 for the base64 encoding details + * + * @param decoder Pointer to MY_BASE64_DECODER + * + * @return + * FALSE on success (there are some more non-space input characters) + * TRUE on error (end-of-input found) + */ + +static inline my_bool +my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder) +{ + for ( ; decoder->src < decoder->end; decoder->src++) { - unsigned c= 0; - size_t mark= 0; - - SKIP_SPACE(src, i, len); + if (from_base64_table[(uchar) *decoder->src] != -2) + return FALSE; + } + if (decoder->state > 0) + decoder->error= 1; /* Unexpected end-of-input found */ + return TRUE; +} - c += pos(*src++); - c <<= 6; - i++; - SKIP_SPACE(src, i, len); +/** + * Convert the next character in a base64 encoded stream + * to a number in the range [0..63] + * and mix it with the previously collected value in decoder->c. + * + * @param decode base64 decoding stream + * + * @return + * FALSE on success + * TRUE on error (invalid base64 character found) + */ +static inline my_bool +my_base64_add(MY_BASE64_DECODER *decoder) +{ + int res; + decoder->c <<= 6; + if ((res= from_base64_table[(uchar) *decoder->src++]) < 0) + return (decoder->error= TRUE); + decoder->c+= (uint) res; + return FALSE; +} - c += pos(*src++); - c <<= 6; - i++; - SKIP_SPACE(src, i, len); +/** + * Get the next character from a base64 encoded stream. + * Skip spaces, then scan the next base64 character or a pad character + * and collect bits into decoder->c. + * + * @param decoder Pointer to MY_BASE64_DECODER + * @return + * FALSE on success (a valid base64 encoding character found) + * TRUE on error (unexpected character or unexpected end-of-input found) + */ +static my_bool +my_base64_decoder_getch(MY_BASE64_DECODER *decoder) +{ + if (my_base64_decoder_skip_spaces(decoder)) + return TRUE; /* End-of-input */ - if (*src != '=') - c += pos(*src++); - else + if (!my_base64_add(decoder)) /* Valid base64 character found */ + { + if (decoder->mark) { - src += 2; /* There should be two bytes padding */ - i= len; - mark= 2; - c <<= 6; - goto end; + /* If we have scanned '=' already, then only '=' is valid */ + DBUG_ASSERT(decoder->state == 3); + decoder->error= 1; + decoder->src--; + return TRUE; /* expected '=', but encoding character found */ } - c <<= 6; - i++; - - SKIP_SPACE(src, i, len); + decoder->state++; + return FALSE; + } - if (*src != '=') - c += pos(*src++); + /* Process error */ + switch (decoder->state) + { + case 0: + case 1: + decoder->src--; + return TRUE; /* base64 character expected */ + break; + + case 2: + case 3: + if (decoder->src[-1] == '=') + { + decoder->error= 0; /* Not an error - it's a pad character */ + decoder->mark++; + } else { - src += 1; /* There should be one byte padding */ - i= len; - mark= 1; - goto end; + decoder->src--; + return TRUE; /* base64 character or '=' expected */ } - i++; + break; + + default: + DBUG_ASSERT(0); + return TRUE; /* Wrong state, should not happen */ + } - end: - b[0]= (c >> 16) & 0xff; - b[1]= (c >> 8) & 0xff; - b[2]= (c >> 0) & 0xff; + decoder->state++; + return FALSE; +} + + +/** + * Decode a base64 string + * The base64-encoded data in the range ['src','*end_ptr') will be + * decoded and stored starting at 'dst'. The decoding will stop + * after 'len' characters have been read from 'src', or when padding + * occurs in the base64-encoded data. In either case: if 'end_ptr' is + * non-null, '*end_ptr' will be set to point to the character after + * the last read character, even in the presence of error. + * + * Note: 'dst' must have sufficient space to store the decoded data. + * Use base64_needed_decoded_length() to calculate the correct space size. + * + * Note: we allow spaces and line separators at any position. + * + * @param src Pointer to base64-encoded string + * @param len Length of string at 'src' + * @param dst Pointer to location where decoded data will be stored + * @param end_ptr Pointer to variable that will refer to the character + * after the end of the encoded data that were decoded. + * Can be NULL. + * @flags flags e.g. allow multiple chunks + * @return Number of bytes written at 'dst', or -1 in case of failure + */ +int +base64_decode(const char *src_base, size_t len, + void *dst, const char **end_ptr, int flags) +{ + char *d= (char*) dst; + MY_BASE64_DECODER decoder; - for (j=0; j<3-mark; j++) - *d++= b[j]; + decoder.src= src_base; + decoder.end= src_base + len; + decoder.error= 0; + decoder.mark= 0; + + for ( ; ; ) + { + decoder.c= 0; + decoder.state= 0; + + if (my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder)) + break; + + *d++= (decoder.c >> 16) & 0xff; + *d++= (decoder.c >> 8) & 0xff; + *d++= (decoder.c >> 0) & 0xff; + + if (decoder.mark) + { + d-= decoder.mark; + if (!(flags & MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS)) + break; + decoder.mark= 0; + } } + /* Return error if there are more non-space characters */ + decoder.state= 0; + if (!my_base64_decoder_skip_spaces(&decoder)) + decoder.error= 1; + if (end_ptr != NULL) - *end_ptr= src; + *end_ptr= decoder.src; - /* - The variable 'i' is set to 'len' when padding has been read, so it - does not actually reflect the number of bytes read from 'src'. - */ - return i != len ? -1 : (int) (d - dst_base); + return decoder.error ? -1 : (int) (d - (char*) dst); } |