summaryrefslogtreecommitdiff
path: root/mysys/base64.c
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mnogosearch.org>2013-09-23 18:58:33 +0400
committerAlexander Barkov <bar@mnogosearch.org>2013-09-23 18:58:33 +0400
commite33582d20d2a9f215dc4d0effa55886bbabdce3d (patch)
treeeebea43211dfaefb954c40af5b72afe97019b572 /mysys/base64.c
parent9cbd53bfb2e72376080a3951185e4780b0519718 (diff)
downloadmariadb-git-e33582d20d2a9f215dc4d0effa55886bbabdce3d.tar.gz
Merging TO_BASE64() and FROM_BASE64() from MySQL-5.6
Diffstat (limited to 'mysys/base64.c')
-rw-r--r--mysys/base64.c350
1 files changed, 247 insertions, 103 deletions
diff --git a/mysys/base64.c b/mysys/base64.c
index b48bcb85e03..88aab7a6450 100644
--- a/mysys/base64.c
+++ b/mysys/base64.c
@@ -1,5 +1,5 @@
-/* Copyright (c) 2003-2008 MySQL AB, 2009 Sun Microsystems, Inc.
- Use is subject to license terms.
+/* Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+ Copyright (c) 2013, MariaDB Foundation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -25,6 +25,28 @@ static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
+/**
+ * Maximum length base64_needed_encoded_length()
+ * can handle without signed integer overflow: (x + 2) / 3 * 4
+ */
+int
+base64_encode_max_arg_length()
+{
+#if (SIZEOF_INT == 8)
+ /*
+ (6827690988321067803 + 2) / 3 + 4 -> 9223372036854775805 Okey
+ (6827690988321067804 + 2) / 3 + 4 -> -9223372036854775807 Overflow
+ */
+ return 0x5EC0D4C77B03531BLL; /* 6827690988321067803 */
+#else
+ /*
+ 1589695686 -> 2147483646 (7FFFFFFE)
+ 1589695687 -> -2147483645
+ */
+ return 0x5EC0D4C6; /* 1589695686 */
+#endif
+}
+
int
base64_needed_encoded_length(int length_of_data)
@@ -39,10 +61,24 @@ base64_needed_encoded_length(int length_of_data)
}
+/**
+ * Maximum length supported by base64_decode().
+ */
+int
+base64_decode_max_arg_length()
+{
+#if (SIZEOF_INT == 8)
+ return 0x7FFFFFFFFFFFFFFFLL;
+#else
+ return 0x7FFFFFFF;
+#endif
+}
+
+
int
base64_needed_decoded_length(int length_of_encoded_data)
{
- return (int) ceil(length_of_encoded_data * 3 / 4);
+ return (int) ((longlong) length_of_encoded_data + 3) / 4 * 3;
}
@@ -51,6 +87,11 @@ base64_needed_decoded_length(int length_of_encoded_data)
Note: We require that dst is pre-allocated to correct size.
See base64_needed_encoded_length().
+
+ Note: We add line separators every 76 characters.
+
+ Note: The output string is properly padded with the '=' character,
+ so the length of the output string is always divisable by 4.
*/
int
@@ -101,130 +142,233 @@ base64_encode(const void *src, size_t src_len, char *dst)
}
-static inline uint
-pos(unsigned char c)
+/*
+ Base64 decoder stream
+*/
+typedef struct my_base64_decoder_t
{
- return (uint) (strchr(base64_table, c) - base64_table);
-}
-
-
-#define SKIP_SPACE(src, i, size) \
-{ \
- while (i < size && my_isspace(&my_charset_latin1, * src)) \
- { \
- i++; \
- src++; \
- } \
- if (i == size) \
- { \
- break; \
- } \
-}
+ const char *src; /* Pointer to the current input position */
+ const char *end; /* Pointer to the end of input buffer */
+ uint c; /* Collect bits into this number */
+ int error; /* Error code */
+ uchar state; /* Character number in the current group of 4 */
+ uchar mark; /* Number of padding marks in the current group */
+} MY_BASE64_DECODER;
/*
- Decode a base64 string
-
- SYNOPSIS
- base64_decode()
- src Pointer to base64-encoded string
- len Length of string at 'src'
- dst Pointer to location where decoded data will be stored
- end_ptr Pointer to variable that will refer to the character
- after the end of the encoded data that were decoded. Can
- be NULL.
-
- DESCRIPTION
-
- The base64-encoded data in the range ['src','*end_ptr') will be
- decoded and stored starting at 'dst'. The decoding will stop
- after 'len' characters have been read from 'src', or when padding
- occurs in the base64-encoded data. In either case: if 'end_ptr' is
- non-null, '*end_ptr' will be set to point to the character after
- the last read character, even in the presence of error.
-
- NOTE
- We require that 'dst' is pre-allocated to correct size.
-
- SEE ALSO
- base64_needed_decoded_length().
-
- RETURN VALUE
- Number of bytes written at 'dst' or -1 in case of failure
+ Helper table for decoder.
+ -2 means "space character"
+ -1 means "bad character"
+ Non-negative values mean valid base64 encoding character.
*/
-int
-base64_decode(const char *src_base, size_t len,
- void *dst, const char **end_ptr)
+static int8
+from_base64_table[]=
{
- char b[3];
- size_t i= 0;
- char *dst_base= (char *)dst;
- char const *src= src_base;
- char *d= dst_base;
- size_t j;
-
- while (i < len)
+/*00*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-2,-2,-2,-2,-2,-1,-1,
+/*10*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*20*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* !"#$%&'()*+,-./ */
+/*30*/ 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 0123456789:;<=>? */
+/*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* @ABCDEFGHIJKLMNO */
+/*50*/ 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* PQRSTUVWXYZ[\]^_ */
+/*60*/ -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* `abcdefghijklmno */
+/*70*/ 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* pqrstuvwxyz{|}~ */
+/*80*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*90*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*A0*/ -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*B0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*C0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*D0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*E0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*F0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+};
+
+
+/**
+ * Skip leading spaces in a base64 encoded stream
+ * and stop on the first non-space character.
+ * decoder->src will point to the first non-space character,
+ * or to the end of the input string.
+ * In case when end-of-input met on unexpected position,
+ * decoder->error is also set to 1.
+ *
+ * See http://en.wikipedia.org/wiki/Base64 for the base64 encoding details
+ *
+ * @param decoder Pointer to MY_BASE64_DECODER
+ *
+ * @return
+ * FALSE on success (there are some more non-space input characters)
+ * TRUE on error (end-of-input found)
+ */
+
+static inline my_bool
+my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder)
+{
+ for ( ; decoder->src < decoder->end; decoder->src++)
{
- unsigned c= 0;
- size_t mark= 0;
-
- SKIP_SPACE(src, i, len);
+ if (from_base64_table[(uchar) *decoder->src] != -2)
+ return FALSE;
+ }
+ if (decoder->state > 0)
+ decoder->error= 1; /* Unexpected end-of-input found */
+ return TRUE;
+}
- c += pos(*src++);
- c <<= 6;
- i++;
- SKIP_SPACE(src, i, len);
+/**
+ * Convert the next character in a base64 encoded stream
+ * to a number in the range [0..63]
+ * and mix it with the previously collected value in decoder->c.
+ *
+ * @param decode base64 decoding stream
+ *
+ * @return
+ * FALSE on success
+ * TRUE on error (invalid base64 character found)
+ */
+static inline my_bool
+my_base64_add(MY_BASE64_DECODER *decoder)
+{
+ int res;
+ decoder->c <<= 6;
+ if ((res= from_base64_table[(uchar) *decoder->src++]) < 0)
+ return (decoder->error= TRUE);
+ decoder->c+= (uint) res;
+ return FALSE;
+}
- c += pos(*src++);
- c <<= 6;
- i++;
- SKIP_SPACE(src, i, len);
+/**
+ * Get the next character from a base64 encoded stream.
+ * Skip spaces, then scan the next base64 character or a pad character
+ * and collect bits into decoder->c.
+ *
+ * @param decoder Pointer to MY_BASE64_DECODER
+ * @return
+ * FALSE on success (a valid base64 encoding character found)
+ * TRUE on error (unexpected character or unexpected end-of-input found)
+ */
+static my_bool
+my_base64_decoder_getch(MY_BASE64_DECODER *decoder)
+{
+ if (my_base64_decoder_skip_spaces(decoder))
+ return TRUE; /* End-of-input */
- if (*src != '=')
- c += pos(*src++);
- else
+ if (!my_base64_add(decoder)) /* Valid base64 character found */
+ {
+ if (decoder->mark)
{
- src += 2; /* There should be two bytes padding */
- i= len;
- mark= 2;
- c <<= 6;
- goto end;
+ /* If we have scanned '=' already, then only '=' is valid */
+ DBUG_ASSERT(decoder->state == 3);
+ decoder->error= 1;
+ decoder->src--;
+ return TRUE; /* expected '=', but encoding character found */
}
- c <<= 6;
- i++;
-
- SKIP_SPACE(src, i, len);
+ decoder->state++;
+ return FALSE;
+ }
- if (*src != '=')
- c += pos(*src++);
+ /* Process error */
+ switch (decoder->state)
+ {
+ case 0:
+ case 1:
+ decoder->src--;
+ return TRUE; /* base64 character expected */
+ break;
+
+ case 2:
+ case 3:
+ if (decoder->src[-1] == '=')
+ {
+ decoder->error= 0; /* Not an error - it's a pad character */
+ decoder->mark++;
+ }
else
{
- src += 1; /* There should be one byte padding */
- i= len;
- mark= 1;
- goto end;
+ decoder->src--;
+ return TRUE; /* base64 character or '=' expected */
}
- i++;
+ break;
+
+ default:
+ DBUG_ASSERT(0);
+ return TRUE; /* Wrong state, should not happen */
+ }
- end:
- b[0]= (c >> 16) & 0xff;
- b[1]= (c >> 8) & 0xff;
- b[2]= (c >> 0) & 0xff;
+ decoder->state++;
+ return FALSE;
+}
+
+
+/**
+ * Decode a base64 string
+ * The base64-encoded data in the range ['src','*end_ptr') will be
+ * decoded and stored starting at 'dst'. The decoding will stop
+ * after 'len' characters have been read from 'src', or when padding
+ * occurs in the base64-encoded data. In either case: if 'end_ptr' is
+ * non-null, '*end_ptr' will be set to point to the character after
+ * the last read character, even in the presence of error.
+ *
+ * Note: 'dst' must have sufficient space to store the decoded data.
+ * Use base64_needed_decoded_length() to calculate the correct space size.
+ *
+ * Note: we allow spaces and line separators at any position.
+ *
+ * @param src Pointer to base64-encoded string
+ * @param len Length of string at 'src'
+ * @param dst Pointer to location where decoded data will be stored
+ * @param end_ptr Pointer to variable that will refer to the character
+ * after the end of the encoded data that were decoded.
+ * Can be NULL.
+ * @flags flags e.g. allow multiple chunks
+ * @return Number of bytes written at 'dst', or -1 in case of failure
+ */
+int
+base64_decode(const char *src_base, size_t len,
+ void *dst, const char **end_ptr, int flags)
+{
+ char *d= (char*) dst;
+ MY_BASE64_DECODER decoder;
- for (j=0; j<3-mark; j++)
- *d++= b[j];
+ decoder.src= src_base;
+ decoder.end= src_base + len;
+ decoder.error= 0;
+ decoder.mark= 0;
+
+ for ( ; ; )
+ {
+ decoder.c= 0;
+ decoder.state= 0;
+
+ if (my_base64_decoder_getch(&decoder) ||
+ my_base64_decoder_getch(&decoder) ||
+ my_base64_decoder_getch(&decoder) ||
+ my_base64_decoder_getch(&decoder))
+ break;
+
+ *d++= (decoder.c >> 16) & 0xff;
+ *d++= (decoder.c >> 8) & 0xff;
+ *d++= (decoder.c >> 0) & 0xff;
+
+ if (decoder.mark)
+ {
+ d-= decoder.mark;
+ if (!(flags & MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS))
+ break;
+ decoder.mark= 0;
+ }
}
+ /* Return error if there are more non-space characters */
+ decoder.state= 0;
+ if (!my_base64_decoder_skip_spaces(&decoder))
+ decoder.error= 1;
+
if (end_ptr != NULL)
- *end_ptr= src;
+ *end_ptr= decoder.src;
- /*
- The variable 'i' is set to 'len' when padding has been read, so it
- does not actually reflect the number of bytes read from 'src'.
- */
- return i != len ? -1 : (int) (d - dst_base);
+ return decoder.error ? -1 : (int) (d - (char*) dst);
}