MDEV-16375 Function to normalize a json value

This patch implements JSON_NORMALIZE SQL function. Co-authored-by: Vicențiu Ciorbaru <vicentiu@mariadb.org>
author: Eric Herman <eric@freesa.org> 2021-07-02 08:18:16 +0200
committer: Vicențiu-Marian Ciorbaru <vicentiu@mariadb.org> 2021-07-21 16:32:11 +0300
commit: fcde34176456ee567bccb90812f4dceff674c027 (patch)
tree: 19fb0781057aab814ae5aa1de7dde658e9fc0b4e
parent: 105e4148bfe863a1580ad0bd7442bc661a4e02fd (diff)
download: mariadb-git-fcde34176456ee567bccb90812f4dceff674c027.tar.gz
5 files changed, 221 insertions, 0 deletions
diff --git a/mysql-test/main/json_normalize.result b/mysql-test/main/json_normalize.result
new file mode 100644
index 00000000000..f88cadad189
--- /dev/null
+++ b/mysql-test/main/json_normalize.result
@@ -0,0 +1,77 @@
+set names utf8;
+create table t1 (json json);
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `json` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL CHECK (json_valid(`json`))
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+insert into t1 values
+('{  }'),
+('[  ]'),
+('{ "foo" : "bar" }'),
+('{ "foo" : "bar", "baz" : "whatever" }'),
+('[ 1.2, 0.0, "text", 0, null, true, false ]'),
+('[ "string", { "key" : "val", "a" : "b", "c" : [ 10, 9, 8, "seven", 11 ] }]'),
+('{ "ăț€": "val1", "âț€":"val2" }');
+select json, json_normalize(json) from t1
+order by json;
+json	json_normalize(json)
+[  ]	[]
+[ "string", { "key" : "val", "a" : "b", "c" : [ 10, 9, 8, "seven", 11 ] }]	["string",{"a":"b","c":[1.0E1,9.0E0,8.0E0,"seven",1.1E1],"key":"val"}]
+[ 1.2, 0.0, "text", 0, null, true, false ]	[1.2E0,0.0E0,"text",0.0E0,null,true,false]
+{  }	{}
+{ "foo" : "bar" }	{"foo":"bar"}
+{ "foo" : "bar", "baz" : "whatever" }	{"baz":"whatever","foo":"bar"}
+{ "ăț€": "val1", "âț€":"val2" }	{"âț€":"val2","ăț€":"val1"}
+create view v1 as (select json, json_normalize(json) norm_json from t1);
+show create view v1;
+View	Create View	character_set_client	collation_connection
+v1	CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS (select `t1`.`json` AS `json`,json_normalize(`t1`.`json`) AS `norm_json` from `t1`)	utf8mb3	utf8mb3_general_ci
+select * from v1
+order by json;
+json	norm_json
+[  ]	[]
+[ "string", { "key" : "val", "a" : "b", "c" : [ 10, 9, 8, "seven", 11 ] }]	["string",{"a":"b","c":[1.0E1,9.0E0,8.0E0,"seven",1.1E1],"key":"val"}]
+[ 1.2, 0.0, "text", 0, null, true, false ]	[1.2E0,0.0E0,"text",0.0E0,null,true,false]
+{  }	{}
+{ "foo" : "bar" }	{"foo":"bar"}
+{ "foo" : "bar", "baz" : "whatever" }	{"baz":"whatever","foo":"bar"}
+{ "ăț€": "val1", "âț€":"val2" }	{"âț€":"val2","ăț€":"val1"}
+select json_normalize(NULL);
+json_normalize(NULL)
+NULL
+select json_normalize('{ "invalid": "no_close"');
+json_normalize('{ "invalid": "no_close"')
+NULL
+drop table t1;
+drop view v1;
+create table t1 (text varchar(200) character set 'latin1');
+insert into t1 values (unhex('22E522'));
+create table t2 (text varchar(200) character set 'utf8mb4');
+insert into t2 SELECT * FROM t1;
+select t1.text, hex(t1.text) from t1;
+text	hex(t1.text)
+"å"	22E522
+select t2.text, hex(t2.text) from t2;
+text	hex(t2.text)
+"å"	22C3A522
+select t1.text
+, t2.text
+, replace(json_normalize(t1.text), unhex('C3A5'), unhex('C385'))
+, replace(json_normalize(t2.text), unhex('C3A5'), unhex('C385'))
+, hex(replace(json_normalize(t1.text), unhex('C3A5'), unhex('C385')))
+, hex(replace(json_normalize(t2.text), unhex('C3A5'), unhex('C385')))
+from t1, t2;
+text	text	replace(json_normalize(t1.text), unhex('C3A5'), unhex('C385'))	replace(json_normalize(t2.text), unhex('C3A5'), unhex('C385'))	hex(replace(json_normalize(t1.text), unhex('C3A5'), unhex('C385')))	hex(replace(json_normalize(t2.text), unhex('C3A5'), unhex('C385')))
+"å"	"å"	"Å"	"Å"	22C38522	22C38522
+drop table t1;
+drop table t2;
+create table t1 (text varchar(1));
+insert into t1 values ('0');
+select concat_ws(' ', t1.text, t1.text) from t1;
+concat_ws(' ', t1.text, t1.text)
+0 0
+select concat_ws(' ', json_normalize(t1.text), json_normalize(t1.text)) from t1;
+concat_ws(' ', json_normalize(t1.text), json_normalize(t1.text))
+0.0E0 0.0E0
+drop table t1;
diff --git a/mysql-test/main/json_normalize.test b/mysql-test/main/json_normalize.test
new file mode 100644
index 00000000000..29faa514cca
--- /dev/null
+++ b/mysql-test/main/json_normalize.test
@@ -0,0 +1,58 @@
+set names utf8;
+
+create table t1 (json json);
+show create table t1;
+
+
+insert into t1 values
+('{  }'),
+('[  ]'),
+('{ "foo" : "bar" }'),
+('{ "foo" : "bar", "baz" : "whatever" }'),
+('[ 1.2, 0.0, "text", 0, null, true, false ]'),
+('[ "string", { "key" : "val", "a" : "b", "c" : [ 10, 9, 8, "seven", 11 ] }]'),
+('{ "ăț€": "val1", "âț€":"val2" }');
+
+select json, json_normalize(json) from t1
+order by json;
+
+
+create view v1 as (select json, json_normalize(json) norm_json from t1);
+show create view v1;
+
+select * from v1
+order by json;
+
+select json_normalize(NULL);
+select json_normalize('{ "invalid": "no_close"');
+
+drop table t1;
+drop view v1;
+
+create table t1 (text varchar(200) character set 'latin1');
+insert into t1 values (unhex('22E522'));
+
+create table t2 (text varchar(200) character set 'utf8mb4');
+insert into t2 SELECT * FROM t1;
+
+select t1.text, hex(t1.text) from t1;
+select t2.text, hex(t2.text) from t2;
+
+select t1.text
+     , t2.text
+     , replace(json_normalize(t1.text), unhex('C3A5'), unhex('C385'))
+     , replace(json_normalize(t2.text), unhex('C3A5'), unhex('C385'))
+     , hex(replace(json_normalize(t1.text), unhex('C3A5'), unhex('C385')))
+     , hex(replace(json_normalize(t2.text), unhex('C3A5'), unhex('C385')))
+from t1, t2;
+
+drop table t1;
+drop table t2;
+
+create table t1 (text varchar(1));
+insert into t1 values ('0');
+
+select concat_ws(' ', t1.text, t1.text) from t1;
+select concat_ws(' ', json_normalize(t1.text), json_normalize(t1.text)) from t1;
+
+drop table t1;
diff --git a/sql/item_create.cc b/sql/item_create.cc
index c80cf7b03d7..79f558ee844 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -902,6 +902,19 @@ protected:
 };
 
 
+class Create_func_json_normalize : public Create_func_arg1
+{
+public:
+  virtual Item *create_1_arg(THD *thd, Item *arg1);
+
+  static Create_func_json_normalize s_singleton;
+
+protected:
+  Create_func_json_normalize() {}
+  virtual ~Create_func_json_normalize() {}
+};
+
+
 class Create_func_json_exists : public Create_func_arg2
 {
 public:
@@ -3596,6 +3609,15 @@ Create_func_isnull::create_1_arg(THD *thd, Item *arg1)
   return new (thd->mem_root) Item_func_isnull(thd, arg1);
 }
 
+Create_func_json_normalize Create_func_json_normalize::s_singleton;
+
+Item*
+Create_func_json_normalize::create_1_arg(THD *thd, Item *arg1)
+{
+  status_var_increment(thd->status_var.feature_json);
+  return new (thd->mem_root) Item_func_json_normalize(thd, arg1);
+}
+
 
 Create_func_json_exists Create_func_json_exists::s_singleton;
 
@@ -5561,6 +5583,7 @@ Native_func_registry func_array[] =
   { { STRING_WITH_LEN("JSON_MERGE") }, BUILDER(Create_func_json_merge)},
   { { STRING_WITH_LEN("JSON_MERGE_PATCH") }, BUILDER(Create_func_json_merge_patch)},
   { { STRING_WITH_LEN("JSON_MERGE_PRESERVE") }, BUILDER(Create_func_json_merge)},
+  { { STRING_WITH_LEN("JSON_NORMALIZE") }, BUILDER(Create_func_json_normalize)},
   { { STRING_WITH_LEN("JSON_QUERY") }, BUILDER(Create_func_json_query)},
   { { STRING_WITH_LEN("JSON_QUOTE") }, BUILDER(Create_func_json_quote)},
   { { STRING_WITH_LEN("JSON_OBJECT") }, BUILDER(Create_func_json_object)},
diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc
index 5ecde3842ef..32043b8f6a7 100644
--- a/sql/item_jsonfunc.cc
+++ b/sql/item_jsonfunc.cc
@@ -3885,3 +3885,48 @@ String* Item_func_json_objectagg::val_str(String* str)
 }
 
 
+String *Item_func_json_normalize::val_str(String *buf)
+{
+  String tmp;
+  String *raw_json= args[0]->val_str(&tmp);
+
+  DYNAMIC_STRING normalized_json;
+  if (init_dynamic_string(&normalized_json, NULL, 0, 0))
+  {
+    null_value= 1;
+    return NULL;
+  }
+
+  null_value= args[0]->null_value;
+  if (null_value)
+    goto end;
+
+  if (json_normalize(&normalized_json,
+                     raw_json->c_ptr(), raw_json->length(),
+                     raw_json->charset()))
+  {
+    null_value= 1;
+    goto end;
+  }
+
+  buf->length(0);
+  if (buf->append(normalized_json.str, normalized_json.length))
+  {
+    null_value= 1;
+    goto end;
+  }
+
+end:
+  dynstr_free(&normalized_json);
+  return null_value ? NULL : buf;
+}
+
+
+bool Item_func_json_normalize::fix_length_and_dec()
+{
+  collation.set(&my_charset_utf8mb4_bin);
+  /* 0 becomes 0.0E0, thus one character becomes 5 chars */
+  fix_char_length_ulonglong((ulonglong) args[0]->max_char_length() * 5);
+  set_maybe_null();
+  return FALSE;
+}
diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h
index a249349c8f8..04d06a0ebf7 100644
--- a/sql/item_jsonfunc.h
+++ b/sql/item_jsonfunc.h
@@ -443,6 +443,24 @@ public:
   { return get_item_copy<Item_func_json_merge_patch>(thd, this); }
 };
 
+
+class Item_func_json_normalize: public Item_json_func
+{
+public:
+  Item_func_json_normalize(THD *thd, Item *a):
+    Item_json_func(thd, a) {}
+  String *val_str(String *) override;
+  LEX_CSTRING func_name_cstring() const override
+  {
+    static LEX_CSTRING name= {STRING_WITH_LEN("json_normalize") };
+    return name;
+  }
+  bool fix_length_and_dec() override;
+  Item *get_copy(THD *thd) override
+  { return get_item_copy<Item_func_json_normalize>(thd, this); }
+};
+
+
 class Item_func_json_length: public Item_long_func
 {
   bool check_arguments() const override
author	Eric Herman <eric@freesa.org>	2021-07-02 08:18:16 +0200
committer	Vicențiu-Marian Ciorbaru <vicentiu@mariadb.org>	2021-07-21 16:32:11 +0300
commit	fcde34176456ee567bccb90812f4dceff674c027 (patch)
tree	19fb0781057aab814ae5aa1de7dde658e9fc0b4e
parent	105e4148bfe863a1580ad0bd7442bc661a4e02fd (diff)
download	mariadb-git-fcde34176456ee567bccb90812f4dceff674c027.tar.gz