1 files changed, 515 insertions, 0 deletions
diff --git a/plugin/type_mysql_json/mysql_json.cc b/plugin/type_mysql_json/mysql_json.cc
new file mode 100644
index 00000000000..4a75cae3909
--- /dev/null
+++ b/plugin/type_mysql_json/mysql_json.cc
@@ -0,0 +1,515 @@
+/*
+  Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+  Copyright (c) 2020 MariaDB Foundation
+
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "mysql_json.h"
+#include "my_global.h"
+#include "compat56.h"
+#include "my_decimal.h"
+#include "sql_time.h"
+
+static void TIME_from_longlong_date_packed(MYSQL_TIME *ltime, longlong tmp)
+{
+  TIME_from_longlong_datetime_packed(ltime, tmp);
+  ltime->time_type= MYSQL_TIMESTAMP_DATE;
+}
+
+
+/*
+  Json values in MySQL comprises the standard set of JSON values plus a MySQL
+  specific set. A JSON number type is subdivided into int, uint, double and
+  decimal.
+
+  MySQL also adds four built-in date/time values: date, time, datetime and
+  timestamp. An additional opaque value can store any other MySQL type.
+*/
+
+enum JSONB_LITERAL_TYPES {
+  JSONB_NULL_LITERAL=      0x0,
+  JSONB_TRUE_LITERAL=      0x1,
+  JSONB_FALSE_LITERAL=     0x2,
+};
+
+/*
+  The size of offset or size fields in the small and the large storage
+  format for JSON objects and JSON arrays.
+*/
+static const uchar SMALL_OFFSET_SIZE= 2;
+static const uchar LARGE_OFFSET_SIZE= 4;
+
+/*
+  The size of key entries for objects when using the small storage
+  format or the large storage format. In the small format it is 4
+  bytes (2 bytes for key length and 2 bytes for key offset). In the
+  large format it is 6 (2 bytes for length, 4 bytes for offset).
+*/
+static const uchar KEY_ENTRY_SIZE_SMALL= (2 + SMALL_OFFSET_SIZE);
+static const uchar KEY_ENTRY_SIZE_LARGE= (2 + LARGE_OFFSET_SIZE);
+
+/*
+  The size of value entries for objects or arrays. When using the
+  small storage format, the entry size is 3 (1 byte for type, 2 bytes
+  for offset). When using the large storage format, it is 5 (1 byte
+  for type, 4 bytes for offset).
+*/
+static const uchar VALUE_ENTRY_SIZE_SMALL= (1 + SMALL_OFFSET_SIZE);
+static const uchar VALUE_ENTRY_SIZE_LARGE= (1 + LARGE_OFFSET_SIZE);
+
+/* The maximum number of nesting levels allowed in a JSON document. */
+static const uchar JSON_DOCUMENT_MAX_DEPTH= 150;
+
+/**
+  Read an offset or size field from a buffer. The offset could be either
+  a two byte unsigned integer or a four byte unsigned integer.
+
+  @param data  the buffer to read from
+  @param large tells if the large or small storage format is used; true
+               means read four bytes, false means read two bytes
+*/
+static inline size_t read_offset_or_size(const uchar *data, bool large)
+{
+  return large ? uint4korr(data) : uint2korr(data);
+}
+
+static inline size_t key_size(bool large)
+{
+  return large ? KEY_ENTRY_SIZE_LARGE : KEY_ENTRY_SIZE_SMALL;
+}
+
+static inline size_t value_size(bool large)
+{
+  return large ? VALUE_ENTRY_SIZE_LARGE : VALUE_ENTRY_SIZE_SMALL;
+}
+
+/**
+  Inlined values are a space optimization. The actual value is stored
+  instead of the offset pointer to the location where a non-inlined
+  value would be located.
+
+  @param[in] type   The type to check.
+  @param[in] large tells if the large or small storage format is used;
+*/
+static inline bool type_is_stored_inline(JSONB_TYPES type, bool large)
+{
+  return (type == JSONB_TYPE_INT16 ||
+          type == JSONB_TYPE_UINT16 ||
+          type == JSONB_TYPE_LITERAL ||
+          (large && (type == JSONB_TYPE_INT32 ||
+                     type == JSONB_TYPE_UINT32)));
+}
+
+/**
+  Read a variable length integer. A variable length integer uses the 8th bit in
+  each byte to mark if there are more bytes needed to store the integer. The
+  other 7 bits in the byte are used to store the actual integer's bits.
+
+  @param[in]  data         the buffer to read from
+  @param[in]  data_length  the maximum number of bytes to read from data
+  @param[out] length       the length that was read
+  @param[out] num          the number of bytes needed to represent the length
+  @return  false on success, true on error
+*/
+static inline bool read_variable_length(const uchar *data, size_t data_length,
+                                        size_t *length, size_t *num)
+{
+  /*
+    It takes five bytes to represent UINT_MAX32, which is the largest
+    supported length, so don't look any further.
+
+    Use data_length as max value to prevent segfault when reading a corrupted
+    JSON document.
+  */
+  const size_t MAX_BYTES= MY_MIN(data_length, 5);
+  size_t len= 0;
+  for (size_t i= 0; i < MAX_BYTES; i++)
+  {
+    /* Get the next 7 bits of the length. */
+    len|= (data[i] & 0x7f) << (7 * i);
+    if ((data[i] & 0x80) == 0)
+    {
+      /* The length shouldn't exceed 32 bits. */
+      if (len > UINT_MAX32)
+        return true;
+
+      /* This was the last byte. Return successfully. */
+      *num= i + 1;
+      *length= len;
+      return false;
+    }
+  }
+
+  /* No more available bytes. Return true to signal error. This implies a
+     corrupted JSON document. */
+  return true;
+}
+
+/**
+   JSON formatting in MySQL escapes a few special characters to prevent
+   ambiguity.
+*/
+static bool append_string_json(String *buffer, const uchar *data, size_t len)
+{
+  const uchar *last= data + len;
+  for (; data < last; data++)
+  {
+    const uchar c= *data;
+    switch (c) {
+    case '\\':
+      buffer->append("\\\\");
+      break;
+    case '\n':
+      buffer->append("\\n");
+      break;
+    case '\r':
+      buffer->append("\\r");
+      break;
+    case '"':
+      buffer->append("\\\"");
+      break;
+    case '\b':
+      buffer->append("\\b");
+      break;
+    case '\f':
+      buffer->append("\\f");
+      break;
+    case '\t':
+      buffer->append("\\t");
+      break;
+    default:
+      buffer->append(c);
+      break;
+    }
+  }
+  return false;
+}
+
+/*
+  Function used for JSON_OPAQUE type.
+*/
+static bool print_mysql_datetime_value(String *buffer, enum_field_types type,
+                                       const uchar *data, size_t len)
+{
+  if (len < 8)
+    return true;
+
+  MYSQL_TIME t;
+  switch (type)
+  {
+    case MYSQL_TYPE_TIME:
+      TIME_from_longlong_time_packed(&t, sint8korr(data));
+      break;
+    case MYSQL_TYPE_DATE:
+      TIME_from_longlong_date_packed(&t, sint8korr(data));
+      break;
+    case MYSQL_TYPE_DATETIME:
+    case MYSQL_TYPE_TIMESTAMP:
+      TIME_from_longlong_datetime_packed(&t, sint8korr(data));
+      break;
+    default:
+      DBUG_ASSERT(0);
+      return true;
+  }
+  /* Wrap all datetime strings within double quotes. */
+  buffer->append('\"');
+  buffer->reserve(MAX_DATE_STRING_REP_LENGTH);
+  buffer->length(buffer->length() +
+                 my_TIME_to_str(&t, const_cast<char *>(buffer->end()), 6));
+  buffer->append('\"');
+  return false;
+}
+
+static bool parse_mysql_scalar(String *buffer, size_t value_json_type,
+                               const uchar *data, size_t len)
+{
+  switch (value_json_type) {
+  case JSONB_TYPE_LITERAL:
+  {
+    if (len < 1)
+      return true;
+    switch (static_cast<JSONB_LITERAL_TYPES>(*data)) {
+    case JSONB_NULL_LITERAL:
+      return buffer->append("null");
+    case JSONB_TRUE_LITERAL:
+      return buffer->append("true");
+    case JSONB_FALSE_LITERAL:
+      return buffer->append("false");
+    default: /* Invalid literal constant, malformed JSON. */
+      return true;
+    }
+  }
+  case JSONB_TYPE_INT16:
+    return len < 2 || buffer->append_longlong(sint2korr(data));
+  case JSONB_TYPE_INT32:
+    return len < 4 || buffer->append_longlong(sint4korr(data));
+  case JSONB_TYPE_INT64:
+    return len < 8 || buffer->append_longlong(sint8korr(data));
+  case JSONB_TYPE_UINT16:
+    return len < 2 || buffer->append_ulonglong(uint2korr(data));
+  case JSONB_TYPE_UINT32:
+    return len < 4 || buffer->append_ulonglong(uint4korr(data));
+  case JSONB_TYPE_UINT64:
+    return len < 8 || buffer->append_ulonglong(uint8korr(data));
+  case JSONB_TYPE_DOUBLE:
+    if (len < 8)
+      return true;
+    buffer->reserve(FLOATING_POINT_BUFFER, 2 * FLOATING_POINT_BUFFER);
+    buffer->qs_append(reinterpret_cast<const double *>(data));
+    return false;
+  case JSONB_TYPE_STRING:
+  {
+    size_t string_length, store_bytes;
+
+    return read_variable_length(data, len, &string_length, &store_bytes) ||
+           len < store_bytes + string_length ||
+           buffer->append('"') ||
+           append_string_json(buffer, data + store_bytes, string_length) ||
+           buffer->append('"');
+  }
+  case JSONB_TYPE_OPAQUE:
+  {
+    /* The field_type maps directly to enum_field_types. */
+    const uchar type_value= *data;
+    const enum_field_types field_type= static_cast<enum_field_types>(type_value);
+
+    size_t UNINIT_VAR(blob_length), length_bytes;
+    const uchar *blob_start;
+
+    if (read_variable_length(data + 1, len, &blob_length, &length_bytes) ||
+        len < length_bytes + blob_length)
+      return true;
+    blob_start= data + length_bytes + 1;
+
+    switch (field_type) {
+    case MYSQL_TYPE_TIME:
+    case MYSQL_TYPE_DATE:
+    case MYSQL_TYPE_DATETIME:
+    case MYSQL_TYPE_TIMESTAMP:
+      return print_mysql_datetime_value(buffer, field_type,
+                                        blob_start, blob_length);
+    case MYSQL_TYPE_NEWDECIMAL:
+    {
+      /* Expect at least two bytes, which contain precision and scale. */
+      if (blob_length < 2)
+        return true;
+
+      const int precision= blob_start[0];
+      const int scale= blob_start[1];
+
+      my_decimal d;
+
+      /* The decimal value is encoded after the two prec/scale bytes. */
+      const size_t dec_size= my_decimal_get_binary_size(precision, scale);
+      if (dec_size != blob_length - 2 ||
+          binary2my_decimal(E_DEC_ERROR,
+                            reinterpret_cast<const uchar *>(blob_start + 2),
+                            &d, precision, scale) != E_DEC_OK)
+        return true;
+
+      if (d.to_string_native(buffer, 0, 0, ' ', E_DEC_ERROR) != E_DEC_OK)
+        return true;
+      return false;
+    }
+    default:
+    {
+      /* Any other MySQL type is presented as a base64 encoded string. */
+      if (buffer->append("\"base64:type") ||
+          buffer->append_longlong(field_type) ||
+          buffer->append(':'))
+        return true;
+
+      const size_t needed= my_base64_needed_encoded_length(
+          static_cast<int>(blob_length));
+      if (buffer->reserve(needed) ||
+          my_base64_encode(blob_start, blob_length,
+                           const_cast<char*>(buffer->end())))
+        return true;
+      /* -1 to override the null terminator from my_base64_encode */
+      DBUG_ASSERT(*(buffer->end() + needed) == '\0');
+      buffer->length(buffer->length() + needed - 1);
+      return buffer->append('"');
+    }
+    }
+  }
+  default:
+    return true;
+  }
+}
+
+
+/**
+  Read a value from a JSON Object or Array, given the position of it.
+  This function handles both inlined values as well as values stored at
+  an offset.
+
+  @param[out] buffer            Where to print the results.
+  @param[in] data               The raw binary data of the Object or Array.
+  @param[in] len                The length of the binary data.
+  @param[in] value_type_offset  Where the type of the value is stored.
+  @param[in] large              true if the large storage format is used;
+  @param[in] depth              How deep the JSON object is in the hierarchy.
+*/
+static bool parse_mysql_scalar_or_value(String *buffer, const uchar *data,
+                                        size_t len, size_t value_type_offset,
+                                        bool large, size_t depth)
+{
+  /* Get the type of the value stored at the key. */
+  const JSONB_TYPES value_type=
+    static_cast<JSONB_TYPES>(data[value_type_offset]);
+
+  if (type_is_stored_inline(value_type, large))
+  {
+    const size_t value_start = value_type_offset + 1;
+    if (parse_mysql_scalar(buffer, value_type, data + value_start,
+                           len - value_start))
+      return true;
+  }
+  else
+  {
+    /* The offset to where the value is stored is relative to the start
+       of the Object / Array */
+    const size_t value_start= read_offset_or_size(
+                                      data + value_type_offset + 1, large);
+    if (parse_mysql_json_value(buffer, value_type, data + value_start,
+                               len - value_start, depth))
+      return true;
+  }
+  return false;
+}
+
+static bool parse_array_or_object(String *buffer, const uchar *data, size_t len,
+                                  bool handle_as_object, bool large,
+                                  size_t depth)
+{
+  if (++depth > JSON_DOCUMENT_MAX_DEPTH)
+    return true;
+
+  /*
+    Make sure the document is long enough to contain the two length fields
+    (both number of elements or members, and number of bytes).
+  */
+  const size_t offset_size= large ? LARGE_OFFSET_SIZE : SMALL_OFFSET_SIZE;
+  /* The length has to be at least double offset size (header). */
+  if (len < 2 * offset_size)
+    return true;
+
+
+  /*
+     Every JSON Object or Array contains two numbers in the header:
+     - The number of elements in the Object / Array (Keys)
+     - The total number of bytes occupied by the JSON Object / Array, including
+       the two numbers in the header.
+     Depending on the Object / Array type (small / large) the numbers are stored
+     in 2 bytes or 4 bytes each.
+  */
+  const size_t element_count= read_offset_or_size(data, large);
+  const size_t bytes= read_offset_or_size(data + offset_size, large);
+
+  /* The value can't have more bytes than what's available in the buffer. */
+  if (bytes > len)
+    return true;
+
+  if (buffer->append(handle_as_object ? '{' : '['))
+    return true;
+
+
+  for (size_t i= 0; i < element_count; i++)
+  {
+    if (handle_as_object)
+    {
+      /*
+        The JSON Object is stored as a header part and a data part.
+        Header consists of:
+        - two length fields,
+        - an array of pointers to keys.
+        - an array of tuples (type, pointer to values)
+          * For certain types, the pointer to values is replaced by the actual
+            value. (see type_is_stored_inline)
+        Data consists of:
+        - All Key data, in order
+        - All Value data, in order
+      */
+      const size_t key_offset= 2 * offset_size + i * key_size(large);
+      const size_t key_start= read_offset_or_size(data + key_offset, large);
+      /* The length of keys is always stored in 2 bytes (large == false) */
+      const size_t key_len= read_offset_or_size(
+                                   data + key_offset + offset_size, false);
+
+      const size_t value_type_offset=(2 * offset_size +
+                                      element_count * key_size(large) +
+                                      i * value_size(large));
+
+      /* First print the key. */
+      if (buffer->append('"') ||
+          append_string_json(buffer, data + key_start, key_len) ||
+          buffer->append("\": "))
+      {
+        return true;
+      }
+
+      /* Then print the value. */
+      if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset,
+                                      large, depth))
+        return true;
+    }
+    else
+    {
+      /*
+         Arrays do not have the keys vector and its associated data.
+         We jump straight to reading values.
+      */
+      const size_t value_type_offset= 2 * offset_size + value_size(large) * i;
+
+      if (parse_mysql_scalar_or_value(buffer, data, bytes, value_type_offset,
+                                      large, depth))
+        return true;
+    }
+
+    if (i != element_count - 1 && buffer->append(", "))
+      return true;
+  }
+
+  return buffer->append(handle_as_object ? '}' : ']');
+}
+
+/**
+  Check the first byte of data which is the enum structure and based on it
+  perform parsing of object or array where each can have small or large
+  representation.
+
+  @param[out] buffer            Where to print the results.
+  @param[in] type               Type of value {object, array, scalar}.
+  @param[in] data               Raw data for parsing.
+  @param[in] length             Length of data.
+  @param[in] depth              Depth size.
+*/
+bool parse_mysql_json_value(String *buffer, JSONB_TYPES type, const uchar *data,
+                            size_t len, size_t depth)
+{
+  const bool IS_OBJECT=true, IS_LARGE=true;
+  switch (type) {
+  case JSONB_TYPE_SMALL_OBJECT:
+    return parse_array_or_object(buffer, data, len, IS_OBJECT, !IS_LARGE, depth);
+  case JSONB_TYPE_LARGE_OBJECT:
+    return parse_array_or_object(buffer, data, len, IS_OBJECT, IS_LARGE, depth);
+  case JSONB_TYPE_SMALL_ARRAY:
+    return parse_array_or_object(buffer, data, len, !IS_OBJECT, !IS_LARGE, depth);
+  case JSONB_TYPE_LARGE_ARRAY:
+    return parse_array_or_object(buffer, data, len, !IS_OBJECT, IS_LARGE, depth);
+  default:
+    return parse_mysql_scalar(buffer, type, data, len);
+  }
+}