Merging TO_BASE64() and FROM_BASE64() from MySQL-5.6

author: Alexander Barkov <bar@mnogosearch.org> 2013-09-23 18:58:33 +0400
committer: Alexander Barkov <bar@mnogosearch.org> 2013-09-23 18:58:33 +0400
commit: e33582d20d2a9f215dc4d0effa55886bbabdce3d (patch)
tree: eebea43211dfaefb954c40af5b72afe97019b572 /mysys/base64.c
parent: 9cbd53bfb2e72376080a3951185e4780b0519718 (diff)
download: mariadb-git-e33582d20d2a9f215dc4d0effa55886bbabdce3d.tar.gz
1 files changed, 247 insertions, 103 deletions
diff --git a/mysys/base64.c b/mysys/base64.c
index b48bcb85e03..88aab7a6450 100644
--- a/mysys/base64.c
+++ b/mysys/base64.c
@@ -1,5 +1,5 @@
-/* Copyright (c) 2003-2008 MySQL AB, 2009 Sun Microsystems, Inc.
-   Use is subject to license terms.
+/* Copyright (c) 2003, 2010, Oracle and/or its affiliates.
+   Copyright (c) 2013, MariaDB Foundation.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -25,6 +25,28 @@ static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                              "abcdefghijklmnopqrstuvwxyz"
                              "0123456789+/";
 
+/**
+ * Maximum length base64_needed_encoded_length()
+ * can handle without signed integer overflow: (x + 2) / 3 * 4
+ */
+int
+base64_encode_max_arg_length()
+{
+#if (SIZEOF_INT == 8)
+  /*
+    (6827690988321067803 + 2) / 3 + 4 ->   9223372036854775805  Okey
+    (6827690988321067804 + 2) / 3 + 4 ->  -9223372036854775807  Overflow
+  */
+  return 0x5EC0D4C77B03531BLL; /* 6827690988321067803 */
+#else
+  /*
+    1589695686 ->  2147483646 (7FFFFFFE)
+    1589695687 -> -2147483645
+  */
+  return 0x5EC0D4C6; /* 1589695686 */
+#endif
+}
+
 
 int
 base64_needed_encoded_length(int length_of_data)
@@ -39,10 +61,24 @@ base64_needed_encoded_length(int length_of_data)
 }
 
 
+/**
+ * Maximum length supported by base64_decode().
+ */
+int
+base64_decode_max_arg_length()
+{
+#if (SIZEOF_INT == 8)
+  return 0x7FFFFFFFFFFFFFFFLL;
+#else
+  return 0x7FFFFFFF;
+#endif
+}
+
+
 int
 base64_needed_decoded_length(int length_of_encoded_data)
 {
-  return (int) ceil(length_of_encoded_data * 3 / 4);
+  return (int) ((longlong) length_of_encoded_data + 3) / 4 * 3;
 }
 
 
@@ -51,6 +87,11 @@ base64_needed_decoded_length(int length_of_encoded_data)
 
   Note: We require that dst is pre-allocated to correct size.
         See base64_needed_encoded_length().
+
+  Note: We add line separators every 76 characters.
+  
+  Note: The output string is properly padded with the '=' character,
+  so the length of the output string is always divisable by 4.
 */
 
 int
@@ -101,130 +142,233 @@ base64_encode(const void *src, size_t src_len, char *dst)
 }
 
 
-static inline uint
-pos(unsigned char c)
+/*
+  Base64 decoder stream
+*/
+typedef struct my_base64_decoder_t
 {
-  return (uint) (strchr(base64_table, c) - base64_table);
-}
-
-
-#define SKIP_SPACE(src, i, size)                                \
-{                                                               \
-  while (i < size && my_isspace(&my_charset_latin1, * src))     \
-  {                                                             \
-    i++;                                                        \
-    src++;                                                      \
-  }                                                             \
-  if (i == size)                                                \
-  {                                                             \
-    break;                                                      \
-  }                                                             \
-}
+  const char *src; /* Pointer to the current input position        */
+  const char *end; /* Pointer to the end of input buffer           */
+  uint c;          /* Collect bits into this number                */
+  int error;       /* Error code                                   */
+  uchar state;     /* Character number in the current group of 4   */
+  uchar mark;      /* Number of padding marks in the current group */
+} MY_BASE64_DECODER;
 
 
 /*
-  Decode a base64 string
-
-  SYNOPSIS
-    base64_decode()
-    src      Pointer to base64-encoded string
-    len      Length of string at 'src'
-    dst      Pointer to location where decoded data will be stored
-    end_ptr  Pointer to variable that will refer to the character
-             after the end of the encoded data that were decoded. Can
-             be NULL.
-
-  DESCRIPTION
-
-    The base64-encoded data in the range ['src','*end_ptr') will be
-    decoded and stored starting at 'dst'.  The decoding will stop
-    after 'len' characters have been read from 'src', or when padding
-    occurs in the base64-encoded data. In either case: if 'end_ptr' is
-    non-null, '*end_ptr' will be set to point to the character after
-    the last read character, even in the presence of error.
-
-  NOTE
-    We require that 'dst' is pre-allocated to correct size.
-
-  SEE ALSO
-    base64_needed_decoded_length().
-
-  RETURN VALUE
-    Number of bytes written at 'dst' or -1 in case of failure
+  Helper table for decoder.
+  -2 means "space character"
+  -1 means "bad character"
+  Non-negative values mean valid base64 encoding character.
 */
-int
-base64_decode(const char *src_base, size_t len,
-              void *dst, const char **end_ptr)
+static int8
+from_base64_table[]=
 {
-  char b[3];
-  size_t i= 0;
-  char *dst_base= (char *)dst;
-  char const *src= src_base;
-  char *d= dst_base;
-  size_t j;
-
-  while (i < len)
+/*00*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-2,-2,-2,-2,-2,-1,-1,
+/*10*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*20*/  -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /*  !"#$%&'()*+,-./ */
+/*30*/  52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 0123456789:;<=>? */
+/*40*/  -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* @ABCDEFGHIJKLMNO */
+/*50*/  15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* PQRSTUVWXYZ[\]^_ */
+/*60*/  -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* `abcdefghijklmno */
+/*70*/  41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* pqrstuvwxyz{|}~  */
+/*80*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*90*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*A0*/  -2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*B0*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*C0*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*D0*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*E0*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+/*F0*/  -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+};
+
+
+/**
+ * Skip leading spaces in a base64 encoded stream
+ * and stop on the first non-space character.
+ * decoder->src will point to the first non-space character,
+ * or to the end of the input string.
+ * In case when end-of-input met on unexpected position,
+ * decoder->error is also set to 1.
+ *
+ * See http://en.wikipedia.org/wiki/Base64 for the base64 encoding details
+ *
+ * @param  decoder  Pointer to MY_BASE64_DECODER
+ *
+ * @return
+ *   FALSE on success (there are some more non-space input characters)
+ *   TRUE  on error (end-of-input found)
+ */
+
+static inline my_bool
+my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder)
+{
+  for ( ; decoder->src < decoder->end; decoder->src++)
   {
-    unsigned c= 0;
-    size_t mark= 0;
-
-    SKIP_SPACE(src, i, len);
+    if (from_base64_table[(uchar) *decoder->src] != -2)
+      return FALSE;
+  }
+  if (decoder->state > 0)
+    decoder->error= 1; /* Unexpected end-of-input found */
+  return TRUE;
+}
 
-    c += pos(*src++);
-    c <<= 6;
-    i++;
 
-    SKIP_SPACE(src, i, len);
+/**
+ * Convert the next character in a base64 encoded stream
+ * to a number in the range [0..63]
+ * and mix it with the previously collected value in decoder->c.
+ *
+ * @param decode base64 decoding stream
+ *
+ * @return
+ *   FALSE on success
+ *   TRUE  on error (invalid base64 character found)
+ */
+static inline my_bool
+my_base64_add(MY_BASE64_DECODER *decoder)
+{
+  int res;
+  decoder->c <<= 6;
+  if ((res= from_base64_table[(uchar) *decoder->src++]) < 0)
+    return (decoder->error= TRUE);
+  decoder->c+= (uint) res;
+  return FALSE;
+}
 
-    c += pos(*src++);
-    c <<= 6;
-    i++;
 
-    SKIP_SPACE(src, i, len);
+/**
+ * Get the next character from a base64 encoded stream.
+ * Skip spaces, then scan the next base64 character or a pad character
+ * and collect bits into decoder->c.
+ *
+ * @param  decoder  Pointer to MY_BASE64_DECODER
+ * @return
+ *  FALSE on success (a valid base64 encoding character found)
+ *  TRUE  on error (unexpected character or unexpected end-of-input found)
+ */
+static my_bool
+my_base64_decoder_getch(MY_BASE64_DECODER *decoder)
+{
+  if (my_base64_decoder_skip_spaces(decoder))
+    return TRUE; /* End-of-input */
 
-    if (*src != '=')
-      c += pos(*src++);
-    else
+  if (!my_base64_add(decoder)) /* Valid base64 character found */
+  {
+    if (decoder->mark)
     {
-      src += 2;                /* There should be two bytes padding */
-      i= len;
-      mark= 2;
-      c <<= 6;
-      goto end;
+      /* If we have scanned '=' already, then only '=' is valid */
+      DBUG_ASSERT(decoder->state == 3);
+      decoder->error= 1;
+      decoder->src--;
+      return TRUE; /* expected '=', but encoding character found */
     }
-    c <<= 6;
-    i++;
-
-    SKIP_SPACE(src, i, len);
+    decoder->state++;
+    return FALSE;
+  }
 
-    if (*src != '=')
-      c += pos(*src++);
+  /* Process error */
+  switch (decoder->state)
+  {
+  case 0:
+  case 1:
+    decoder->src--;
+    return TRUE; /* base64 character expected */
+    break;
+
+  case 2:
+  case 3:
+    if (decoder->src[-1] == '=')
+    {
+      decoder->error= 0; /* Not an error - it's a pad character */
+      decoder->mark++;
+    }
     else
     {
-      src += 1;                 /* There should be one byte padding */
-      i= len;
-      mark= 1;
-      goto end;
+      decoder->src--;
+      return TRUE; /* base64 character or '=' expected */
     }
-    i++;
+    break;
+
+  default:
+    DBUG_ASSERT(0);
+    return TRUE; /* Wrong state, should not happen */
+  }
 
-  end:
-    b[0]= (c >> 16) & 0xff;
-    b[1]= (c >>  8) & 0xff;
-    b[2]= (c >>  0) & 0xff;
+  decoder->state++;
+  return FALSE;
+}
+
+
+/**
+ * Decode a base64 string
+ * The base64-encoded data in the range ['src','*end_ptr') will be
+ * decoded and stored starting at 'dst'.  The decoding will stop
+ * after 'len' characters have been read from 'src', or when padding
+ * occurs in the base64-encoded data. In either case: if 'end_ptr' is
+ * non-null, '*end_ptr' will be set to point to the character after
+ * the last read character, even in the presence of error.
+ *
+ * Note: 'dst' must have sufficient space to store the decoded data.
+ * Use base64_needed_decoded_length() to calculate the correct space size.
+ *
+ * Note: we allow spaces and line separators at any position.
+ *
+ * @param src     Pointer to base64-encoded string
+ * @param len     Length of string at 'src'
+ * @param dst     Pointer to location where decoded data will be stored
+ * @param end_ptr Pointer to variable that will refer to the character
+ *                after the end of the encoded data that were decoded.
+ *                Can be NULL.
+ * @flags         flags e.g. allow multiple chunks
+ * @return Number of bytes written at 'dst', or -1 in case of failure
+ */
+int
+base64_decode(const char *src_base, size_t len,
+              void *dst, const char **end_ptr, int flags)
+{
+  char *d= (char*) dst;
+  MY_BASE64_DECODER decoder;
 
-    for (j=0; j<3-mark; j++)
-      *d++= b[j];
+  decoder.src= src_base;
+  decoder.end= src_base + len;
+  decoder.error= 0;
+  decoder.mark= 0;
+
+  for ( ; ; )
+  {
+    decoder.c= 0;
+    decoder.state= 0;
+
+    if (my_base64_decoder_getch(&decoder) ||
+        my_base64_decoder_getch(&decoder) ||
+        my_base64_decoder_getch(&decoder) ||
+        my_base64_decoder_getch(&decoder))
+      break;
+
+    *d++= (decoder.c >> 16) & 0xff;
+    *d++= (decoder.c >>  8) & 0xff;
+    *d++= (decoder.c >>  0) & 0xff;
+
+    if (decoder.mark)
+    {
+      d-= decoder.mark;
+      if (!(flags & MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS))
+        break;
+      decoder.mark= 0;
+    }
   }
 
+  /* Return error if there are more non-space characters */
+  decoder.state= 0;
+  if (!my_base64_decoder_skip_spaces(&decoder))
+    decoder.error= 1;
+
   if (end_ptr != NULL)
-    *end_ptr= src;
+    *end_ptr= decoder.src;
 
-  /*
-    The variable 'i' is set to 'len' when padding has been read, so it
-    does not actually reflect the number of bytes read from 'src'.
-   */
-  return i != len ? -1 : (int) (d - dst_base);
+  return decoder.error ? -1 : (int) (d - (char*) dst);
 }
author	Alexander Barkov <bar@mnogosearch.org>	2013-09-23 18:58:33 +0400
committer	Alexander Barkov <bar@mnogosearch.org>	2013-09-23 18:58:33 +0400
commit	e33582d20d2a9f215dc4d0effa55886bbabdce3d (patch)
tree	eebea43211dfaefb954c40af5b72afe97019b572 /mysys/base64.c
parent	9cbd53bfb2e72376080a3951185e4780b0519718 (diff)
download	mariadb-git-e33582d20d2a9f215dc4d0effa55886bbabdce3d.tar.gz