diff options
author | Eric Herman <eric@freesa.org> | 2021-07-02 07:59:56 +0200 |
---|---|---|
committer | Vicențiu-Marian Ciorbaru <vicentiu@mariadb.org> | 2021-07-21 16:32:11 +0300 |
commit | 105e4148bfe863a1580ad0bd7442bc661a4e02fd (patch) | |
tree | 0ce02426d37d7828891ca8875891622070dab233 /unittest | |
parent | 7b587fcbe74e46a5b902e096b6c86d0476560489 (diff) | |
download | mariadb-git-105e4148bfe863a1580ad0bd7442bc661a4e02fd.tar.gz |
Add json_normalize function to json_lib
This patch implements a library for normalizing json documents.
The algorithm is:
* Recursively sort json keys according to utf8mb4_bin collation.
* Normalize numbers to be of the form [-]<digit>.<frac>E<exponent>
* All unneeded whitespace and line endings are removed.
* Arrays are not sorted.
Co-authored-by: Vicențiu Ciorbaru <vicentiu@mariadb.org>
Diffstat (limited to 'unittest')
-rw-r--r-- | unittest/json_lib/CMakeLists.txt | 2 | ||||
-rw-r--r-- | unittest/json_lib/json_normalize-t.c | 280 |
2 files changed, 281 insertions, 1 deletions
diff --git a/unittest/json_lib/CMakeLists.txt b/unittest/json_lib/CMakeLists.txt index 1b2a89b28cd..157f37291e9 100644 --- a/unittest/json_lib/CMakeLists.txt +++ b/unittest/json_lib/CMakeLists.txt @@ -19,4 +19,4 @@ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/unittest/mytap) # -MY_ADD_TESTS(json_lib LINK_LIBRARIES strings dbug) +MY_ADD_TESTS(json_lib json_normalize LINK_LIBRARIES strings dbug) diff --git a/unittest/json_lib/json_normalize-t.c b/unittest/json_lib/json_normalize-t.c new file mode 100644 index 00000000000..f72e90175e2 --- /dev/null +++ b/unittest/json_lib/json_normalize-t.c @@ -0,0 +1,280 @@ +/* Copyright (c) 2021 Eric Herman and MariaDB Foundation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_config.h" +#include "config.h" +#include <tap.h> +#include <my_global.h> +#include <json_lib.h> + + +static void +check_json_normalize(const char *in, const char *expected) +{ + int err; + DYNAMIC_STRING result; + + CHARSET_INFO *cs= &my_charset_utf8mb4_general_ci; + + init_dynamic_string(&result, NULL, 0, 0); + + err= json_normalize(&result, in, strlen(in), cs); + + ok(err == 0, "normalize err?"); + + ok(strcmp(expected, result.str) == 0, + "expected '%s' from '%s' but was '%s'", + expected, in, result.str); + + dynstr_free(&result); +} + + +static void +test_json_normalize_invalid(void) +{ + DYNAMIC_STRING result; + + CHARSET_INFO *cs= &my_charset_utf8mb4_general_ci; + + init_dynamic_string(&result, NULL, 0, 0); + ok(json_normalize(&result, STRING_WITH_LEN(""), cs) != 0, + "expected normalized error"); + dynstr_free(&result); + + init_dynamic_string(&result, NULL, 0, 0); + ok(json_normalize(&result, STRING_WITH_LEN("["), cs) != 0, + "expected normalized error"); + dynstr_free(&result); + + init_dynamic_string(&result, NULL, 0, 0); + ok(json_normalize(&result, STRING_WITH_LEN("}"), cs) != 0, + "expected normalized error"); + dynstr_free(&result); + + init_dynamic_string(&result, NULL, 0, 0); + ok(json_normalize(&result, NULL, 0, cs) != 0, + "expected normalized error"); + dynstr_free(&result); +} + + +static void +test_json_normalize_single_kv(void) +{ + const char *in= "" + "{\n" + " \"foo\": \"value\"\n" + "}\n"; + + const char *expected= "{\"foo\":\"value\"}"; + check_json_normalize(in, expected); +} + + +static void +test_json_normalize_multi_kv(void) +{ + const char *in= "" + "{\n" + " \"bar\": \"baz\",\n" + " \"foo\": \"value\"\n" + "}\n"; + + const char *expected= "{\"bar\":\"baz\",\"foo\":\"value\"}"; + check_json_normalize(in, expected); +} + + +static void +test_json_normalize_array(void) +{ + const char *in= "[ \"a\", \"b\", true, false, null ]"; + const char *expected= "[\"a\",\"b\",true,false,null]"; + check_json_normalize(in, expected); +} + + +static void +test_json_normalize_values(void) +{ + check_json_normalize("\"foo\"", "\"foo\""); + check_json_normalize("true", "true"); + check_json_normalize("false", "false"); + check_json_normalize("null", "null"); + check_json_normalize("\"\"", "\"\""); + check_json_normalize("{}", "{}"); + check_json_normalize("[]", "[]"); + check_json_normalize("5", "5.0E0"); + check_json_normalize("5.1", "5.1E0"); + check_json_normalize("-5.1", "-5.1E0"); + check_json_normalize("12345.67890", "1.23456789E4"); + check_json_normalize("2.99792458e8", "2.99792458E8"); + check_json_normalize("6.02214076e23", "6.02214076E23"); + check_json_normalize("6.62607015e-34", "6.62607015E-34"); + check_json_normalize("-6.62607015e-34", "-6.62607015E-34"); +} + + +static void +test_json_normalize_nested_objects(void) +{ + const char *in = "" + "{\n" + " \"wiz\": {\n" + "\t\t\"bang\": \"a\",\n\t\t\"alpha\": false\n\t},\n" + " \"foo\": {\"value\":true}\n" + "}"; + + const char *expected= "{\"foo\":{\"value\":true}," + "\"wiz\":{\"alpha\":false,\"bang\":\"a\"}}"; + check_json_normalize(in, expected); +} + + +static void +test_json_normalize_nested_arrays(void) +{ + const char *in = "" + "[\n" + " \"wiz\",\n" + " [\"bang\", \t\t\"alpha\"\t]\n" + "]"; + + const char *expected= "[\"wiz\",[\"bang\",\"alpha\"]]"; + check_json_normalize(in, expected); +} + + +static void +test_json_normalize_nested_deep(void) +{ + const char *in = "" + "{\n" + " \"foo\": \"value\",\n" + " \"wiz\": [true, false, {\n" + "\t\t\"bang\": \"a\",\n\t\t\"alpha\": 12345.67890\n\t},\n \"string\",\n" + "\t{ \"b\": \"one\", \"a\": \"two\", \"c\": \"three\"}, false,\n" + "\t\t[-1.20, \"w\", \"x\"]],\n" + " \"bar\": \"value2\"\n" + "}\n"; + + const char *expected= "" + "{" + "\"bar\":\"value2\"," + "\"foo\":\"value\"," + "\"wiz\":[" + "true,false," + "{\"alpha\":1.23456789E4,\"bang\":\"a\"}," + "\"string\"," + "{\"a\":\"two\",\"b\":\"one\",\"c\":\"three\"}," + "false," + "[-1.2E0,\"w\",\"x\"]" + "]" + "}"; + check_json_normalize(in, expected); +} + + +/* a "friend" function */ +int +json_normalize_number(DYNAMIC_STRING *out, const char *str, size_t str_len); + + +static void +test_json_normalize_non_utf8(void) +{ + int err; + const char utf8[]= { 0x22, 0xC3, 0x8A, 0x22, 0x00 }; + const char latin[] = { 0x22, 0xCA, 0x22, 0x00 }; + DYNAMIC_STRING result; + CHARSET_INFO *cs_utf8= &my_charset_utf8mb4_bin; + CHARSET_INFO *cs_latin= &my_charset_latin1; + + init_dynamic_string(&result, NULL, 0, 0); + err= json_normalize(&result, utf8, strlen(utf8), cs_utf8); + ok(err == 0, "normalize err?"); + ok((strcmp(utf8, result.str) == 0), "utf8 round trip"); + dynstr_free(&result); + + init_dynamic_string(&result, NULL, 0, 0); + err= json_normalize(&result, latin, strlen(latin), cs_latin); + ok(err == 0, "normalize err?"); + ok((strcmp(utf8, result.str) == 0), "latin to utf8 round trip"); + dynstr_free(&result); +} + + +void +check_number_normalize(const char *in, const char *expected) +{ + int err; + DYNAMIC_STRING buf; + + init_dynamic_string(&buf, NULL, 0, 0); + + err= json_normalize_number(&buf, in, strlen(in)); + ok(err == 0, "normalize number err?"); + + ok(strcmp(buf.str, expected) == 0, + "expected: %s\n" + " but was: %s\n" + " from: %s\n", + expected, + buf.str, + in); + + dynstr_free(&buf); +} + + +int +main(void) +{ + plan(88); + diag("Testing json_normalization."); + + check_number_normalize("0", "0.0E0"); + check_number_normalize("-0.0", "0.0E0"); + check_number_normalize("0E100", "0.0E0"); + check_number_normalize("0.000000E100", "0.0E0"); + check_number_normalize("-0E100", "0.0E0"); + check_number_normalize("-0.000E100", "0.0E0"); + check_number_normalize("1", "1.0E0"); + check_number_normalize("-1", "-1.0E0"); + check_number_normalize("36", "3.6E1"); + check_number_normalize("37.000", "3.7E1"); + check_number_normalize("3.000", "3.0E0"); + check_number_normalize("0.00012345", "1.2345E-4"); + check_number_normalize("32.14e234", "3.214E235"); + check_number_normalize("0.00357e-23", "3.57E-26"); + check_number_normalize("0.00357e23", "3.57E20"); + check_number_normalize("123.456e10", "1.23456E12"); + check_number_normalize("123.456e-9", "1.23456E-7"); + check_number_normalize("0000123.456000000e-9", "1.23456E-7"); + check_number_normalize("0000123.456000000e+9", "1.23456E11"); + + test_json_normalize_invalid(); + test_json_normalize_values(); + test_json_normalize_single_kv(); + test_json_normalize_multi_kv(); + test_json_normalize_array(); + test_json_normalize_nested_objects(); + test_json_normalize_nested_arrays(); + test_json_normalize_nested_deep(); + test_json_normalize_non_utf8(); + + return exit_status(); +} |