diff options
author | Alexey Botchkov <holyfoot@askmonty.org> | 2016-10-19 14:10:03 +0400 |
---|---|---|
committer | Alexey Botchkov <holyfoot@askmonty.org> | 2016-10-19 14:10:03 +0400 |
commit | 27025221fe2ea17aa737ad2ad31011407c00dcc9 (patch) | |
tree | 0f5352944d26aceb427320756c90bde3b00f13a3 | |
parent | 8303aded294ce905bbc513e7ee42623d5f1fdb50 (diff) | |
download | mariadb-git-27025221fe2ea17aa737ad2ad31011407c00dcc9.tar.gz |
MDEV-9143 JSON_xxx functions.
strings/json_lib.c added as a JSON library.
SQL frunction added with sql/item_jsonfunc.h/cc
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | include/CMakeLists.txt | 1 | ||||
-rw-r--r-- | include/json_lib.h | 356 | ||||
-rw-r--r-- | libmysqld/CMakeLists.txt | 3 | ||||
-rw-r--r-- | mysql-test/r/func_json.result | 123 | ||||
-rw-r--r-- | mysql-test/r/parser_precedence.result | 2 | ||||
-rw-r--r-- | mysql-test/t/func_json.test | 54 | ||||
-rw-r--r-- | sql/CMakeLists.txt | 2 | ||||
-rw-r--r-- | sql/item.h | 15 | ||||
-rw-r--r-- | sql/item_create.cc | 474 | ||||
-rw-r--r-- | sql/item_jsonfunc.cc | 1109 | ||||
-rw-r--r-- | sql/item_jsonfunc.h | 304 | ||||
-rw-r--r-- | sql/item_xmlfunc.cc | 21 | ||||
-rw-r--r-- | sql/item_xmlfunc.h | 5 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 4 | ||||
-rw-r--r-- | storage/connect/mysql-test/connect/disabled.def | 2 | ||||
-rw-r--r-- | strings/CMakeLists.txt | 2 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 19 | ||||
-rw-r--r-- | strings/json_lib.c | 1553 | ||||
-rw-r--r-- | unittest/json_lib/CMakeLists.txt | 23 | ||||
-rw-r--r-- | unittest/json_lib/json_lib-t.c | 186 |
21 files changed, 4225 insertions, 34 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index e461141acc8..4272699cb4e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -356,6 +356,7 @@ IF(WITH_UNIT_TESTS) ADD_SUBDIRECTORY(unittest/examples) ADD_SUBDIRECTORY(unittest/mysys) ADD_SUBDIRECTORY(unittest/my_decimal) + ADD_SUBDIRECTORY(unittest/json_lib) IF(NOT WITHOUT_SERVER) ADD_SUBDIRECTORY(unittest/sql) ENDIF() diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 8ebd72b9c58..e6de8515fdc 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -61,6 +61,7 @@ SET(HEADERS my_compiler.h handler_state.h handler_ername.h + json_lib.h ) INSTALL(FILES ${HEADERS} DESTINATION ${INSTALL_INCLUDEDIR} COMPONENT Development) diff --git a/include/json_lib.h b/include/json_lib.h new file mode 100644 index 00000000000..d9e3bb7c624 --- /dev/null +++ b/include/json_lib.h @@ -0,0 +1,356 @@ +#ifndef JSON_LIB_INCLUDED +#define JSON_LIB_INCLUDED + +#ifdef __cplusplus +extern "C" { +#endif + +#define JSON_DEPTH_LIMIT 32 + +/* + When error happens, the c_next of the JSON engine contains the + character that caused the error, and the c_str is the position + in string where the error occurs. +*/ +enum json_errors { + JE_BAD_CHR= -1, /* Invalid character, charset handler cannot read it. */ + + JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */ + /* ASCII 00-08 for instance. */ + + JE_EOS= -3, /* Unexpected end of string. */ + + JE_SYN= -4, /* The next character breaks the JSON syntax. */ + + JE_STRING_CONST= -5, /* Character disallowed in string constant. */ + + JE_ESCAPING= -6, /* Error in the escaping. */ + + JE_DEPTH= -7, /* The limit on the JSON depth was overrun. */ +}; + + +typedef struct st_json_string_t +{ + const uchar *c_str; /* Current position in JSON string */ + const uchar *str_end; /* The end on the string. */ + my_wc_t c_next; /* UNICODE of the last read character */ + int error; /* error code. */ + + CHARSET_INFO *cs; /* Character set of the JSON string. */ + + my_charset_conv_mb_wc wc; /* UNICODE conversion function. */ + /* It's taken out of the cs just to speed calls. */ +} json_string_t; + + +void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs); +void json_string_set_str(json_string_t *s, + const uchar *str, const uchar *end); +#define json_next_char(j) \ + (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end) +#define json_eos(j) ((j)->c_str >= (j)->str_end) +/* + read_string_const_chr() reads the next character of the string constant + and saves it to the js->c_next. + It takes into account possible escapings, so if for instance + the string is '\b', the read_string_const_chr() sets 8. +*/ +int json_read_string_const_chr(json_string_t *js); + + +/* + Various JSON-related operations expect JSON path as a parameter. + The path is a string like this "$.keyA[2].*" + The path itself is a number of steps specifying either a key or a position + in an array. Some of them can be wildcards. + So the representation of the JSON path is the json_path_t class + containing an array of json_path_step_t objects. +*/ + + +enum json_path_step_types +{ + JSON_PATH_KEY=0, + JSON_PATH_ARRAY=1 +}; + + +typedef struct st_json_path_step_t +{ + enum json_path_step_types type; /* The type of the step - KEY or ARRAY */ + int wild; /* If the step is a wildcard */ + const uchar *key; /* Pointer to the beginning of the key. */ + const uchar *key_end; /* Pointer to the end of the key. */ + uint n_item; /* Item number in an array. No meaning for the key step. */ +} json_path_step_t; + + +typedef struct st_json_path_t +{ + json_string_t s; /* The string to be parsed. */ + json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */ + json_path_step_t *last_step; /* Points to the last step. */ + + int mode_strict; /* TRUE if the path specified as 'strict' */ +} json_path_t; + + +int json_path_setup(json_path_t *p, + CHARSET_INFO *i_cs, const uchar *str, const uchar *end); + + +/* + The set of functions and structures below provides interface + to the JSON text parser. + Running the parser normally goes like this: + + json_engine_t j_eng; // structure keeps parser's data + json_scan_start(j_eng) // begin the parsing + + do + { + // The parser has read next piece of JSON + // and set fields of j_eng structure accordingly. + // So let's see what we have: + switch (j_eng.state) + { + case JST_KEY: + // Handle key name. See the json_read_keyname_chr() + // Probably compare it with the keyname we're looking for + case JST_VALUE: + // Handle value. It is either value of the key or an array item. + // see the json_read_value() + case JST_OBJ_START: + // parser found an object (the '{' in JSON) + case JST_OBJ_END: + // parser found the end of the object (the '}' in JSON) + case JST_ARRAY_START: + // parser found an array (the '[' in JSON) + case JST_ARRAY_END: + // parser found the end of the array (the ']' in JSON) + + }; + } while (json_scan_next() == 0); // parse next structure + + + if (j_eng.s.error) // we need to check why the loop ended. + // Did we get to the end of JSON, or came upon error. + { + signal_error_in_JSON() + } + + + Parts of JSON can be quickly skipped. If we are not interested + in a particular key, we can just skip it with json_skip_key() call. + Similarly json_skip_level() goes right to the end of an object + or an array. +*/ + + +/* These are JSON parser states that user can expect and handle. */ +enum json_states { + JST_VALUE, /* value found */ + JST_KEY, /* key found */ + JST_OBJ_START, /* object */ + JST_OBJ_END, /* object ended */ + JST_ARRAY_START, /* array */ + JST_ARRAY_END, /* array ended */ + NR_JSON_USER_STATES +}; + + +enum json_value_types +{ + JSON_VALUE_OBJECT=0, + JSON_VALUE_ARRAY=1, + JSON_VALUE_STRING, + JSON_VALUE_NUMBER, + JSON_VALUE_TRUE, + JSON_VALUE_FALSE, + JSON_VALUE_NULL +}; + + +typedef struct st_json_engine_t +{ + json_string_t s; /* String to parse. */ + int sav_c_len; /* Length of the current character. + Can be more than 1 for multibyte charsets */ + + int state; /* The state of the parser. One of 'enum json_states'. + It tells us what construction of JSON we've just read. */ + + /* These values are only set after the json_read_value() call. */ + enum json_value_types value_type; /* type of the value.*/ + const uchar *value; /* Points to the value. */ + const uchar *value_begin;/* Points to where the value starts in the JSON. */ + + /* + In most cases the 'value' and 'value_begin' are equal. + They only differ if the value is a string constants. Then 'value_begin' + points to the starting quotation mark, while the 'value' - to + the first character of the string. + */ + + const uchar *value_end; /* Points to the next character after the value. */ + int value_len; /* The length of the value. Does not count quotations for */ + /* string constants. */ + + int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */ + int *stack_p; /* The 'stack' pointer. */ +} json_engine_t; + + +int json_scan_start(json_engine_t *je, + CHARSET_INFO *i_cs, const uchar *str, const uchar *end); +int json_scan_next(json_engine_t *j); + + +/* + json_read_keyname_chr() function assists parsing the name of an JSON key. + It only can be called when the json_engine is in JST_KEY. + The json_read_keyname_chr() reads one character of the name of the key, + and puts it in j_eng.s.next_c. + Typical usage is like this: + + if (j_eng.state == JST_KEY) + { + while (json_read_keyname_chr(&j) == 0) + { + //handle next character i.e. match it against the pattern + } + } +*/ + +int json_read_keyname_chr(json_engine_t *j); + + +/* + json_read_value() function parses the JSON value syntax, + so that we can handle the value of a key or an array item. + It only returns meaningful result when the engine is in + the JST_VALUE state. + + Typical usage is like this: + + if (j_eng.state == JST_VALUE) + { + json_read_value(&j_eng); + switch(j_eng.value_type) + { + case JSON_VALUE_STRING: + // get the string + str= j_eng.value; + str_length= j_eng.value_len; + case JSON_VALUE_NUMBER: + // get the number + ... etc + } +*/ +int json_read_value(json_engine_t *j); + + +/* + json_skip_key() makes parser skip the content of the current + JSON key quickly. + It can be called only when the json_engine state is JST_KEY. + Typical usage is: + + if (j_eng.state == JST_KEY) + { + if (key_does_not_match(j_eng)) + json_skip_key(j_eng); + } +*/ + +int json_skip_key(json_engine_t *j); + + +/* + json_skip_level() makes parser quickly skip the JSON content + to the end of the current object or array. + It is used when we're not interested in the rest of an array + or the rest of the keys of an object. +*/ +int json_skip_level(json_engine_t *j); + + +#define json_skip_array_item json_skip_key + +/* + Checks if the current value is of scalar type - + not an OBJECT nor ARRAY. +*/ +#define json_value_scalar(je) ((je)->value_type > JSON_VALUE_ARRAY) + +/* + Look for the JSON PATH in the json string. + Function can be called several times with same JSON/PATH to + find multiple matches. + On the first call, the json_engine_t parameter should be + initialized with the JSON string, and the json_path_t with the JSON path + appropriately. The 'p_cur_step' should point at the first + step of the path. + The 'array_counters' is the array of JSON_DEPTH_LIMIT size. + It stores the array counters of the parsed JSON. + If function returns 0, it means it found the match. The position of + the match is je->s.c_str. Then we can call the json_find_path() + with same engine/path/p_cur_step to get the next match. + Non-zero return means no matches found. + Check je->s.error to see if there was an error in JSON. +*/ +int json_find_path(json_engine_t *je, + json_path_t *p, json_path_step_t **p_cur_step, + uint *array_counters); + + +typedef struct st_json_find_paths_t +{ + uint n_paths; + json_path_t *paths; + uint cur_depth; + uint *path_depths; + uint array_counters[JSON_DEPTH_LIMIT]; +} json_find_paths_t; + + +int json_find_paths_first(json_engine_t *je, json_find_paths_t *state, + uint n_paths, json_path_t *paths, uint *path_depths); +int json_find_paths_next(json_engine_t *je, json_find_paths_t *state); + + +/* + Converst JSON string constant into ordinary string constant + which can involve unpacking json escapes and changing character set. + Returns negative integer in the case of an error, + the length of the result otherwise. +*/ +int json_unescape(CHARSET_INFO *json_cs, + const uchar *json_str, const uchar *json_end, + CHARSET_INFO *res_cs, + uchar *res, uchar *res_end); + +/* + Converst ordinary string constant into JSON string constant. + which can involve appropriate escaping and changing character set. + Returns negative integer in the case of an error, + the length of the result otherwise. +*/ +int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end, + CHARSET_INFO *json_cs, uchar *json, uchar *json_end); + + +/* + Appends the ASCII string to the json with the charset conversion. +*/ +int json_append_ascii(CHARSET_INFO *json_cs, + uchar *json, uchar *json_end, + const uchar *ascii, const uchar *ascii_end); + +#ifdef __cplusplus +} +#endif + +#endif /* JSON_LIB_INCLUDED */ + diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index b64348cdd70..62aab628ed2 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -50,7 +50,8 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc ../sql/item.cc ../sql/item_create.cc ../sql/item_func.cc ../sql/item_geofunc.cc ../sql/item_row.cc ../sql/item_strfunc.cc ../sql/item_subselect.cc ../sql/item_sum.cc ../sql/item_timefunc.cc - ../sql/item_xmlfunc.cc ../sql/key.cc ../sql/lock.cc ../sql/log.cc + ../sql/item_xmlfunc.cc ../sql/item_jsonfunc.cc + ../sql/key.cc ../sql/lock.cc ../sql/log.cc ../sql/log_event.cc ../sql/mf_iocache.cc ../sql/my_decimal.cc ../sql/net_serv.cc ../sql/opt_range.cc ../sql/opt_sum.cc ../sql/parse_file.cc ../sql/procedure.cc ../sql/protocol.cc diff --git a/mysql-test/r/func_json.result b/mysql-test/r/func_json.result new file mode 100644 index 00000000000..1a47dd79fe8 --- /dev/null +++ b/mysql-test/r/func_json.result @@ -0,0 +1,123 @@ +select json_valid('[1, 2]'); +json_valid('[1, 2]') +1 +select json_valid('"string"}'); +json_valid('"string"}') +0 +select json_valid('{"key1":1, "key2":[2,3]}'); +json_valid('{"key1":1, "key2":[2,3]}') +1 +select json_valid('[false, true, null]'); +json_valid('[false, true, null]') +1 +select json_value('{"key1":123}', '$.key2'); +json_value('{"key1":123}', '$.key2') +NULL +select json_value('{"key1":123}', '$.key1'); +json_value('{"key1":123}', '$.key1') +123 +select json_value('{"key1":[1,2,3]}', '$.key1'); +json_value('{"key1":[1,2,3]}', '$.key1') +NULL +select json_value('{"key1": [1,2,3], "key1":123}', '$.key1'); +json_value('{"key1": [1,2,3], "key1":123}', '$.key1') +123 +select json_query('{"key1":{"a":1, "b":[1,2]}}', '$.key2'); +json_query('{"key1":{"a":1, "b":[1,2]}}', '$.key2') +NULL +select json_query('{"key1":{"a":1, "b":[1,2]}}', '$.key1'); +json_query('{"key1":{"a":1, "b":[1,2]}}', '$.key1') +{"a":1, "b":[1,2]} +select json_query('{"key1": 1}', '$.key1'); +json_query('{"key1": 1}', '$.key1') +NULL +select json_query('{"key1":123, "key1": [1,2,3]}', '$.key1'); +json_query('{"key1":123, "key1": [1,2,3]}', '$.key1') +[1,2,3] +select json_array(1); +json_array(1) +[1] +select json_array(1, "text", false, null); +json_array(1, "text", false, null) +[1, "text", false, null] +select json_array_append('["a", "b"]', '$', FALSE); +json_array_append('["a", "b"]', '$', FALSE) +["a", "b", false] +select json_array_append('{"k1":1, "k2":["a", "b"]}', '$.k2', 2); +json_array_append('{"k1":1, "k2":["a", "b"]}', '$.k2', 2) +{"k1":1, "k2":["a", "b", 2]} +select json_contains('{"k1":123, "k2":345}', '123', '$.k1'); +json_contains('{"k1":123, "k2":345}', '123', '$.k1') +1 +select json_contains('"you"', '"you"'); +json_contains('"you"', '"you"') +1 +select json_contains('"youth"', '"you"'); +json_contains('"youth"', '"you"') +0 +select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]"); +json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]") +1 +select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[10]"); +json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[10]") +0 +select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.ma"); +json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.ma") +0 +select json_contains_path('{"key1":1, "key2":[2,3]}', "one", "$.key1"); +json_contains_path('{"key1":1, "key2":[2,3]}', "one", "$.key1") +1 +select json_contains_path('{"key1":1, "key2":[2,3]}', "one", "$.key1", "$.ma"); +json_contains_path('{"key1":1, "key2":[2,3]}', "one", "$.key1", "$.ma") +1 +select json_contains_path('{"key1":1, "key2":[2,3]}', "aLl", "$.key1", "$.ma"); +json_contains_path('{"key1":1, "key2":[2,3]}', "aLl", "$.key1", "$.ma") +0 +select json_contains_path('{"key1":1, "key2":[2,3]}', "aLl", "$.key1", "$.key2"); +json_contains_path('{"key1":1, "key2":[2,3]}', "aLl", "$.key1", "$.key2") +1 +select json_extract('{"key1":"asd", "key2":[2,3]}', "$.key1"); +json_extract('{"key1":"asd", "key2":[2,3]}', "$.key1") +asd +select json_extract('{"key1":"asd", "key2":[2,3]}', "$.keyX", "$.keyY"); +json_extract('{"key1":"asd", "key2":[2,3]}', "$.keyX", "$.keyY") +NULL +select json_extract('{"key1":"asd", "key2":[2,3]}', "$.key1", "$.key2"); +json_extract('{"key1":"asd", "key2":[2,3]}', "$.key1", "$.key2") +["asd", [2,3]] +select json_extract('{"key1":5, "key2":[2,3]}', "$.key1", "$.key2"); +json_extract('{"key1":5, "key2":[2,3]}', "$.key1", "$.key2") +[5, [2,3]] +select json_extract('{"key0":true, "key1":"qwe"}', "$.key1"); +json_extract('{"key0":true, "key1":"qwe"}', "$.key1") +qwe +select json_object("ki", 1, "mi", "ya"); +json_object("ki", 1, "mi", "ya") +{"ki": 1, "mi": "ya"} +select json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2"); +json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2") +1 +select json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2[1]"); +json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2[1]") +1 +select json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2[10]"); +json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2[10]") +0 +select json_quote('"string"'); +json_quote('"string"') +\"string\" +select json_merge('string', 123); +json_merge('string', 123) +["string", 123] +select json_type('{"k1":123, "k2":345}'); +json_type('{"k1":123, "k2":345}') +OBJECT +select json_type('[123, "k2", 345]'); +json_type('[123, "k2", 345]') +ARRAY +select json_type("true"); +json_type("true") +BOOLEAN +select json_type('123'); +json_type('123') +NUMBER diff --git a/mysql-test/r/parser_precedence.result b/mysql-test/r/parser_precedence.result index 4330c8a2045..5225dad1668 100644 --- a/mysql-test/r/parser_precedence.result +++ b/mysql-test/r/parser_precedence.result @@ -369,7 +369,7 @@ select (NOT FALSE) AND FALSE, NOT (FALSE AND FALSE), NOT FALSE AND FALSE; 0 1 0 Testing that NOT is associative select NOT NOT TRUE, NOT NOT NOT FALSE; -NOT NOT TRUE NOT NOT NOT FALSE +TRUE NOT NOT NOT FALSE 1 1 Testing that IS has precedence over NOT select (NOT NULL) IS TRUE, NOT (NULL IS TRUE), NOT NULL IS TRUE; diff --git a/mysql-test/t/func_json.test b/mysql-test/t/func_json.test new file mode 100644 index 00000000000..3990ff24fc4 --- /dev/null +++ b/mysql-test/t/func_json.test @@ -0,0 +1,54 @@ +select json_valid('[1, 2]'); +select json_valid('"string"}'); +select json_valid('{"key1":1, "key2":[2,3]}'); +select json_valid('[false, true, null]'); + +select json_value('{"key1":123}', '$.key2'); +select json_value('{"key1":123}', '$.key1'); +select json_value('{"key1":[1,2,3]}', '$.key1'); +select json_value('{"key1": [1,2,3], "key1":123}', '$.key1'); + +select json_query('{"key1":{"a":1, "b":[1,2]}}', '$.key2'); +select json_query('{"key1":{"a":1, "b":[1,2]}}', '$.key1'); +select json_query('{"key1": 1}', '$.key1'); +select json_query('{"key1":123, "key1": [1,2,3]}', '$.key1'); + +select json_array(1); +select json_array(1, "text", false, null); + +select json_array_append('["a", "b"]', '$', FALSE); +select json_array_append('{"k1":1, "k2":["a", "b"]}', '$.k2', 2); + +select json_contains('{"k1":123, "k2":345}', '123', '$.k1'); +select json_contains('"you"', '"you"'); +select json_contains('"youth"', '"you"'); + +select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[1]"); +select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.key2[10]"); +select json_contains_path('{"key1":1, "key2":[2,3]}', "oNE", "$.ma"); +select json_contains_path('{"key1":1, "key2":[2,3]}', "one", "$.key1"); +select json_contains_path('{"key1":1, "key2":[2,3]}', "one", "$.key1", "$.ma"); +select json_contains_path('{"key1":1, "key2":[2,3]}', "aLl", "$.key1", "$.ma"); +select json_contains_path('{"key1":1, "key2":[2,3]}', "aLl", "$.key1", "$.key2"); + +select json_extract('{"key1":"asd", "key2":[2,3]}', "$.key1"); +select json_extract('{"key1":"asd", "key2":[2,3]}', "$.keyX", "$.keyY"); +select json_extract('{"key1":"asd", "key2":[2,3]}', "$.key1", "$.key2"); +select json_extract('{"key1":5, "key2":[2,3]}', "$.key1", "$.key2"); +select json_extract('{"key0":true, "key1":"qwe"}', "$.key1"); + +select json_object("ki", 1, "mi", "ya"); + +select json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2"); +select json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2[1]"); +select json_exists('{"key1":"xxxx", "key2":[1, 2, 3]}', "$.key2[10]"); + +select json_quote('"string"'); + +select json_merge('string', 123); + +select json_type('{"k1":123, "k2":345}'); +select json_type('[123, "k2", 345]'); +select json_type("true"); +select json_type('123'); + diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 28072375bbc..d011b1a07d2 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -136,7 +136,7 @@ SET (SQL_SOURCE opt_table_elimination.cc sql_expression_cache.cc gcalc_slicescan.cc gcalc_tools.cc threadpool_common.cc ../sql-common/mysql_async.c - my_apc.cc my_apc.h mf_iocache_encr.cc + my_apc.cc my_apc.h mf_iocache_encr.cc item_jsonfunc.cc my_json_writer.cc my_json_writer.h rpl_gtid.cc rpl_parallel.cc sql_type.cc sql_type.h diff --git a/sql/item.h b/sql/item.h index 28a14454c0b..76442351d0d 100644 --- a/sql/item.h +++ b/sql/item.h @@ -2997,6 +2997,20 @@ public: }; +/* + We sometimes need to distinguish a number from a boolean: + a[1] and a[true] are different things in XPath. + Also in JSON boolean values should be treated differently. +*/ +class Item_bool :public Item_int +{ +public: + Item_bool(THD *thd, const char *str_arg, longlong i): + Item_int(thd, str_arg, i, 1) {} + bool is_bool_type() { return true; } +}; + + class Item_uint :public Item_int { public: @@ -4750,6 +4764,7 @@ public: #include "item_timefunc.h" #include "item_subselect.h" #include "item_xmlfunc.h" +#include "item_jsonfunc.h" #include "item_create.h" #endif diff --git a/sql/item_create.cc b/sql/item_create.cc index 7f0d4144177..cf6f24eddb7 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -1708,6 +1708,201 @@ protected: #endif +class Create_func_json_exists : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_exists s_singleton; + +protected: + Create_func_json_exists() {} + virtual ~Create_func_json_exists() {} +}; + + +class Create_func_json_valid : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_valid s_singleton; + +protected: + Create_func_json_valid() {} + virtual ~Create_func_json_valid() {} +}; + + +class Create_func_json_type : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_type s_singleton; + +protected: + Create_func_json_type() {} + virtual ~Create_func_json_type() {} +}; + + +class Create_func_json_depth : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_depth s_singleton; + +protected: + Create_func_json_depth() {} + virtual ~Create_func_json_depth() {} +}; + + +class Create_func_json_value : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_value s_singleton; + +protected: + Create_func_json_value() {} + virtual ~Create_func_json_value() {} +}; + + +class Create_func_json_query : public Create_func_arg2 +{ +public: + virtual Item *create_2_arg(THD *thd, Item *arg1, Item *arg2); + + static Create_func_json_query s_singleton; + +protected: + Create_func_json_query() {} + virtual ~Create_func_json_query() {} +}; + + +class Create_func_json_contains: public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_contains s_singleton; + +protected: + Create_func_json_contains() {} + virtual ~Create_func_json_contains() {} +}; + + +class Create_func_json_contains_path : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_contains_path s_singleton; + +protected: + Create_func_json_contains_path() {} + virtual ~Create_func_json_contains_path() {} +}; + + +class Create_func_json_extract : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_extract s_singleton; + +protected: + Create_func_json_extract() {} + virtual ~Create_func_json_extract() {} +}; + + +class Create_func_json_array : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_array s_singleton; + +protected: + Create_func_json_array() {} + virtual ~Create_func_json_array() {} +}; + + +class Create_func_json_array_append : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_array_append s_singleton; + +protected: + Create_func_json_array_append() {} + virtual ~Create_func_json_array_append() {} +}; + + +class Create_func_json_object : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_object s_singleton; + +protected: + Create_func_json_object() {} + virtual ~Create_func_json_object() {} +}; + + +class Create_func_json_length : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_length s_singleton; + +protected: + Create_func_json_length() {} + virtual ~Create_func_json_length() {} +}; + + +class Create_func_json_merge : public Create_native_func +{ +public: + virtual Item *create_native(THD *thd, LEX_STRING name, List<Item> *item_list); + + static Create_func_json_merge s_singleton; + +protected: + Create_func_json_merge() {} + virtual ~Create_func_json_merge() {} +}; + + +class Create_func_json_quote : public Create_func_arg1 +{ +public: + virtual Item *create_1_arg(THD *thd, Item *arg1); + + static Create_func_json_quote s_singleton; + +protected: + Create_func_json_quote() {} + virtual ~Create_func_json_quote() {} +}; + + class Create_func_last_day : public Create_func_arg1 { public: @@ -4572,6 +4767,69 @@ Create_func_issimple::create_1_arg(THD *thd, Item *arg1) #endif +Create_func_json_exists Create_func_json_exists::s_singleton; + +Item* +Create_func_json_exists::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_json_exists(thd, arg1, arg2); +} + + +Create_func_json_valid Create_func_json_valid::s_singleton; + +Item* +Create_func_json_valid::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_json_valid(thd, arg1); +} + + +Create_func_json_type Create_func_json_type::s_singleton; + +Item* +Create_func_json_type::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_json_type(thd, arg1); +} + + +Create_func_json_depth Create_func_json_depth::s_singleton; + +Item* +Create_func_json_depth::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_json_depth(thd, arg1); +} + + +Create_func_json_value Create_func_json_value::s_singleton; + +Item* +Create_func_json_value::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_json_value(thd, arg1, arg2); +} + + +Create_func_json_query Create_func_json_query::s_singleton; + +Item* +Create_func_json_query::create_2_arg(THD *thd, Item *arg1, Item *arg2) +{ + return new (thd->mem_root) Item_func_json_query(thd, arg1, arg2); +} + + +Create_func_json_quote Create_func_json_quote::s_singleton; + +Item* +Create_func_json_quote::create_1_arg(THD *thd, Item *arg1) +{ + return new (thd->mem_root) Item_func_json_quote(thd, arg1); +} + + Create_func_last_day Create_func_last_day::s_singleton; Item* @@ -4581,6 +4839,207 @@ Create_func_last_day::create_1_arg(THD *thd, Item *arg1) } +Create_func_json_array Create_func_json_array::s_singleton; + +Item* +Create_func_json_array::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func; + + if (item_list != NULL) + { + func= new (thd->mem_root) Item_func_json_array(thd, *item_list); + } + else + { + func= new (thd->mem_root) Item_func_json_array(thd); + } + + return func; +} + + +Create_func_json_array_append Create_func_json_array_append::s_singleton; + +Item* +Create_func_json_array_append::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (arg_count < 3 || (arg_count & 1) == 0 /*is even*/) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + } + else + { + func= new (thd->mem_root) Item_func_json_array_append(thd, *item_list); + } + + return func; +} + + +Create_func_json_object Create_func_json_object::s_singleton; + +Item* +Create_func_json_object::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func; + int arg_count; + + if (item_list != NULL) + { + arg_count= item_list->elements; + if ((arg_count & 1) != 0 /*is odd*/) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_object(thd, *item_list); + } + } + else + { + arg_count= 0; + func= new (thd->mem_root) Item_func_json_object(thd); + } + + return func; +} + + +Create_func_json_length Create_func_json_length::s_singleton; + +Item* +Create_func_json_length::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func; + int arg_count; + + if (item_list == NULL || + (arg_count= item_list->elements) == 0) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_length(thd, *item_list); + } + + return func; +} + + +Create_func_json_merge Create_func_json_merge::s_singleton; + +Item* +Create_func_json_merge::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func; + int arg_count; + + if (item_list == NULL || + (arg_count= item_list->elements) == 0) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + func= NULL; + } + else + { + func= new (thd->mem_root) Item_func_json_merge(thd, *item_list); + } + + return func; +} + + +Create_func_json_contains Create_func_json_contains::s_singleton; + +Item* +Create_func_json_contains::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (arg_count < 2 /* json_doc, val, [path]...*/) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + } + else + { + func= new (thd->mem_root) Item_func_json_contains(thd, *item_list); + } + + return func; +} + + +Create_func_json_contains_path Create_func_json_contains_path::s_singleton; + +Item* +Create_func_json_contains_path::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (arg_count < 3 /* json_doc, one_or_all, path, [path]...*/) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + } + else + { + func= new (thd->mem_root) Item_func_json_contains_path(thd, *item_list); + } + + return func; +} + + +Create_func_json_extract Create_func_json_extract::s_singleton; + +Item* +Create_func_json_extract::create_native(THD *thd, LEX_STRING name, + List<Item> *item_list) +{ + Item *func= NULL; + int arg_count= 0; + + if (item_list != NULL) + arg_count= item_list->elements; + + if (arg_count < 2 /* json_doc, path, [path]...*/) + { + my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name.str); + } + else + { + func= new (thd->mem_root) Item_func_json_extract(thd, *item_list); + } + + return func; +} + + Create_func_last_insert_id Create_func_last_insert_id::s_singleton; Item* @@ -5852,6 +6311,21 @@ static Native_func_registry func_array[] = { { C_STRING_WITH_LEN("ISSIMPLE") }, GEOM_BUILDER(Create_func_issimple)}, { { C_STRING_WITH_LEN("IS_FREE_LOCK") }, BUILDER(Create_func_is_free_lock)}, { { C_STRING_WITH_LEN("IS_USED_LOCK") }, BUILDER(Create_func_is_used_lock)}, + { { C_STRING_WITH_LEN("JSON_ARRAY") }, BUILDER(Create_func_json_array)}, + { { C_STRING_WITH_LEN("JSON_ARRAY_APPEND") }, BUILDER(Create_func_json_array_append)}, + { { C_STRING_WITH_LEN("JSON_CONTAINS") }, BUILDER(Create_func_json_contains)}, + { { C_STRING_WITH_LEN("JSON_CONTAINS_PATH") }, BUILDER(Create_func_json_contains_path)}, + { { C_STRING_WITH_LEN("JSON_DEPTH") }, BUILDER(Create_func_json_depth)}, + { { C_STRING_WITH_LEN("JSON_EXISTS") }, BUILDER(Create_func_json_exists)}, + { { C_STRING_WITH_LEN("JSON_EXTRACT") }, BUILDER(Create_func_json_extract)}, + { { C_STRING_WITH_LEN("JSON_LENGTH") }, BUILDER(Create_func_json_length)}, + { { C_STRING_WITH_LEN("JSON_MERGE") }, BUILDER(Create_func_json_merge)}, + { { C_STRING_WITH_LEN("JSON_QUERY") }, BUILDER(Create_func_json_query)}, + { { C_STRING_WITH_LEN("JSON_QUOTE") }, BUILDER(Create_func_json_quote)}, + { { C_STRING_WITH_LEN("JSON_OBJECT") }, BUILDER(Create_func_json_object)}, + { { C_STRING_WITH_LEN("JSON_TYPE") }, BUILDER(Create_func_json_type)}, + { { C_STRING_WITH_LEN("JSON_VALID") }, BUILDER(Create_func_json_valid)}, + { { C_STRING_WITH_LEN("JSON_VALUE") }, BUILDER(Create_func_json_value)}, { { C_STRING_WITH_LEN("LAST_DAY") }, BUILDER(Create_func_last_day)}, { { C_STRING_WITH_LEN("LAST_INSERT_ID") }, BUILDER(Create_func_last_insert_id)}, { { C_STRING_WITH_LEN("LCASE") }, BUILDER(Create_func_lcase)}, diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc new file mode 100644 index 00000000000..80713710927 --- /dev/null +++ b/sql/item_jsonfunc.cc @@ -0,0 +1,1109 @@ +/* Copyright (c) 2016, Monty Program Ab. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include <my_global.h> +#include "sql_priv.h" +#include "sql_class.h" +#include "item.h" + + +/* + Compare ASCII string against the string with the specified + character set. + Only compares the equality, case insencitive. +*/ +static bool eq_ascii_string(const CHARSET_INFO *cs, + const char *ascii, + const char *s, uint32 s_len) +{ + const char *s_end= s + s_len; + + while (*ascii && s < s_end) + { + my_wc_t wc; + int wc_len; + + wc_len= cs->cset->mb_wc(cs, &wc, (uchar *) s, (uchar *) s_end); + if (wc_len <= 0 || (wc | 0x20) != (my_wc_t) *ascii) + return 0; + + ascii++; + s+= wc_len; + } + + return *ascii == 0 && s >= s_end; +} + + +/* + Appends arbitrary String to the JSON string taking charsets in + consideration. +*/ +static int st_append_escaped(String *s, const String *a) +{ + /* + In the worst case one character from the 'a' string + turns into '\uXXXX\uXXXX' which is 12. + */ + int str_len= a->length() * 12 * s->charset()->mbmaxlen / + a->charset()->mbminlen; + if (!s->reserve(str_len, 1024) && + (str_len= + json_escape(a->charset(), (uchar *) a->ptr(), (uchar *)a->end(), + s->charset(), + (uchar *) s->end(), (uchar *)s->end() + str_len)) > 0) + { + s->length(s->length() + str_len); + return 0; + } + + return a->length(); +} + + +longlong Item_func_json_valid::val_int() +{ + String *js= args[0]->val_str(&tmp_value); + json_engine_t je; + + if ((null_value= args[0]->null_value) || js == NULL) + return 0; + + json_scan_start(&je, js->charset(), (const uchar *) js->ptr(), + (const uchar *) js->ptr()+js->length()); + + while (json_scan_next(&je) == 0) {} + + return je.s.error == 0; +} + + +void Item_func_json_exists::fix_length_and_dec() +{ + Item_int_func::fix_length_and_dec(); + maybe_null= 1; + path.set_constant_flag(args[1]->const_item()); +} + + +longlong Item_func_json_exists::val_int() +{ + json_engine_t je; + uint array_counters[JSON_DEPTH_LIMIT]; + + String *js= args[0]->val_str(&tmp_js); + + if (!path.parsed) + { + String *s_p= args[1]->val_str(&tmp_path); + if (s_p && + json_path_setup(&path.p, s_p->charset(), (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto err_return; + path.parsed= path.constant; + } + + if ((null_value= args[0]->null_value || args[1]->null_value)) + { + null_value= 1; + return 0; + } + + null_value= 0; + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + path.cur_step= path.p.steps; + if (json_find_path(&je, &path.p, &path.cur_step, array_counters)) + { + if (je.s.error) + goto err_return; + return 0; + } + + return 1; + +err_return: + null_value= 1; + return 0; +} + + +void Item_func_json_value::fix_length_and_dec() +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length; + path.set_constant_flag(args[1]->const_item()); +} + + +/* + Returns NULL, not an error if the found value + is not a scalar. +*/ +String *Item_func_json_value::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_str(&tmp_js); + int error= 0; + uint array_counters[JSON_DEPTH_LIMIT]; + + if (!path.parsed) + { + String *s_p= args[1]->val_str(&tmp_path); + if (s_p && + json_path_setup(&path.p, s_p->charset(), (const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto err_return; + path.parsed= path.constant; + } + + if ((null_value= args[0]->null_value || args[1]->null_value)) + return NULL; + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + path.cur_step= path.p.steps; +continue_search: + if (json_find_path(&je, &path.p, &path.cur_step, array_counters)) + { + if (je.s.error) + goto err_return; + + null_value= 1; + return 0; + } + + if (json_read_value(&je)) + goto err_return; + + if (check_and_get_value(&je, str, &error)) + { + if (error) + goto err_return; + goto continue_search; + } + + return str; + +err_return: + null_value= 1; + return 0; +} + + +bool Item_func_json_value::check_and_get_value(json_engine_t *je, String *res, + int *error) +{ + if (!json_value_scalar(je)) + { + /* We only look for scalar values! */ + if (json_skip_level(je) || json_scan_next(je)) + *error= 1; + return true; + } + + res->set((const char *) je->value, je->value_len, je->s.cs); + return false; +} + + +bool Item_func_json_query::check_and_get_value(json_engine_t *je, String *res, + int *error) +{ + const uchar *value; + if (json_value_scalar(je)) + { + /* We skip scalar values. */ + if (json_scan_next(je)) + *error= 1; + return true; + } + + value= je->value; + if (json_skip_level(je)) + { + *error= 1; + return true; + } + + res->set((const char *) je->value, je->s.c_str - value, je->s.cs); + return false; +} + + +void Item_func_json_quote::fix_length_and_dec() +{ + collation.set(args[0]->collation); + /* + Odd but realistic worst case is when all characters + of the argument turn into '\uXXXX\uXXXX', which is 12. + */ + max_length= args[0]->max_length * 12; +} + + +String *Item_func_json_quote::val_str(String *str) +{ + String *s= args[0]->val_str(&tmp_s); + + if ((null_value= args[0]->null_value)) + return NULL; + + str->length(0); + str->set_charset(s->charset()); + + if (st_append_escaped(str, s)) + { + /* Report an error. */ + null_value= 1; + return 0; + } + + return str; +} + + +static int alloc_tmp_paths(THD *thd, uint n_paths, + json_path_with_flags **paths,String **tmp_paths) +{ + if (n_paths > 0) + { + *paths= (json_path_with_flags *) alloc_root(thd->mem_root, + sizeof(json_path_with_flags) * n_paths); + *tmp_paths= (String *) alloc_root(thd->mem_root, sizeof(String) * n_paths); + if (*paths == 0 || *tmp_paths == 0) + return 1; + + bzero(*tmp_paths, sizeof(String) * n_paths); + + return 0; + } + + /* n_paths == 0 */ + *paths= 0; + *tmp_paths= 0; + return 0; +} + + +static void mark_constant_paths(json_path_with_flags *p, + Item** args, uint n_args) +{ + uint n; + for (n= 0; n < n_args; n++) + p[n].set_constant_flag(args[n]->const_item()); +} + + +bool Item_json_str_multipath::fix_fields(THD *thd, Item **ref) +{ + return alloc_tmp_paths(thd, get_n_paths(), &paths, &tmp_paths) || + Item_str_func::fix_fields(thd, ref); +} + + +void Item_json_str_multipath::cleanup() +{ + if (tmp_paths) + { + for (uint i= get_n_paths(); i>0; i--) + tmp_paths[i-1].free(); + tmp_paths= 0; + } + Item_str_func::cleanup(); +} + + +void Item_func_json_extract::fix_length_and_dec() +{ + collation.set(args[0]->collation); + max_length= args[0]->max_length * (arg_count - 1); + + mark_constant_paths(paths, args+1, arg_count-1); +} + + +String *Item_func_json_extract::val_str(String *str) +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + bool multiple_values_found= FALSE; + const uchar *value; + const char *first_value= NULL, *first_p_value; + uint n_arg, v_len, first_len, first_p_len; + uint array_counters[JSON_DEPTH_LIMIT]; + + if ((null_value= args[0]->null_value)) + return 0; + + str->set_charset(js->charset()); + str->length(0); + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + json_path_with_flags *c_path= paths + n_arg - 1; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths + (n_arg-1)); + if (s_p && + json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto error; + c_path->parsed= c_path->constant; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + c_path->cur_step= c_path->p.steps; + + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + /* Path wasn't found. */ + if (je.s.error) + goto error; + + continue; + } + + if (json_read_value(&je)) + goto error; + + value= je.value_begin; + if (json_value_scalar(&je)) + v_len= je.value_end - value; + else + { + if (json_skip_level(&je)) + goto error; + v_len= je.s.c_str - value; + } + + if (!multiple_values_found) + { + if (first_value == NULL) + { + /* + Just remember the first value as we don't know yet + if we need to create an array out of it or not. + */ + first_value= (const char *) value; + first_len= v_len; + /* + We need this as we have to preserve quotes around string + constants if we use the value to create an array. Otherwise + we get the value without the quotes. + */ + first_p_value= (const char *) je.value; + first_p_len= je.value_len; + continue; + } + else + { + multiple_values_found= TRUE; /* We have to make an JSON array. */ + if (str->append("[", 1) || + str->append(first_value, first_len)) + goto error; /* Out of memory. */ + } + + } + if (str->append(", ", 2) || + str->append((const char *) value, v_len)) + goto error; /* Out of memory. */ + } + + if (first_value == NULL) + { + /* Nothing was found. */ + null_value= 1; + return 0; + } + + if (multiple_values_found ? + str->append("]") : + str->append(first_p_value, first_p_len)) + goto error; /* Out of memory. */ + + return str; + +error: + /* TODO: launch error messages. */ + null_value= 1; + return 0; +} + + +longlong Item_func_json_extract::val_int() +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + uint n_arg; + uint array_counters[JSON_DEPTH_LIMIT]; + + if ((null_value= args[0]->null_value)) + return 0; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + json_path_with_flags *c_path= paths + n_arg - 1; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+(n_arg-1)); + if (s_p && + json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto error; + c_path->parsed= c_path->constant; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + c_path->cur_step= c_path->p.steps; + + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + /* Path wasn't found. */ + if (je.s.error) + goto error; + + continue; + } + + if (json_read_value(&je)) + goto error; + + if (json_value_scalar(&je)) + { + int err; + char *v_end= (char *) je.value_end; + return (je.s.cs->cset->strtoll10)(je.s.cs, (const char *) je.value_begin, + &v_end, &err); + } + else + break; + } + + /* Nothing was found. */ + null_value= 1; + return 0; + +error: + /* TODO: launch error messages. */ + null_value= 1; + return 0; +} + + +bool Item_func_json_contains::fix_fields(THD *thd, Item **ref) +{ + return alloc_tmp_paths(thd, arg_count-2, &paths, &tmp_paths) || + Item_int_func::fix_fields(thd, ref); +} + + +void Item_func_json_contains::fix_length_and_dec() +{ + a2_constant= args[1]->const_item(); + a2_parsed= FALSE; + mark_constant_paths(paths, args+2, arg_count-2); + Item_int_func::fix_length_and_dec(); +} + + +void Item_func_json_contains::cleanup() +{ + if (tmp_paths) + { + for (uint i= arg_count-2; i>0; i--) + tmp_paths[i-1].free(); + tmp_paths= 0; + } + Item_int_func::cleanup(); +} + + +longlong Item_func_json_contains::val_int() +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + uint n_arg; + + if ((null_value= args[0]->null_value)) + return 0; + + if (!a2_parsed) + { + val= args[1]->val_str(&tmp_val); + a2_parsed= a2_constant; + } + + if (val == 0) + { + null_value= 1; + return 0; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (arg_count<3) /* No path specified. */ + { + if (json_read_value(&je)) + goto error; + String jv_str((const char *)je.value_begin, + je.value_end - je.value_begin, js->charset()); + return val->eq(&jv_str, js->charset()); + } + + for (n_arg=2; n_arg < arg_count; n_arg++) + { + uint array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_arg - 2; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+(n_arg-2)); + if (s_p && + json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto error; + c_path->parsed= c_path->constant; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + c_path->cur_step= c_path->p.steps; + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + /* Path wasn't found. */ + if (je.s.error) + goto error; + continue; + } + + if (json_read_value(&je)) + goto error; + String jv_str((const char *)je.value_begin, + je.value_end - je.value_begin, js->charset()); + if (val->eq(&jv_str, js->charset())) + return 1; + } + + + return 0; + +error: + null_value= 1; + return 0; +} + + +bool Item_func_json_contains_path::fix_fields(THD *thd, Item **ref) +{ + return alloc_tmp_paths(thd, arg_count-2, &paths, &tmp_paths) || + Item_int_func::fix_fields(thd, ref); +} + + +void Item_func_json_contains_path::fix_length_and_dec() +{ + ooa_constant= args[1]->const_item(); + ooa_parsed= FALSE; + mark_constant_paths(paths, args+2, arg_count-2); + Item_int_func::fix_length_and_dec(); +} + + +void Item_func_json_contains_path::cleanup() +{ + if (tmp_paths) + { + for (uint i= arg_count-2; i>0; i--) + tmp_paths[i-1].free(); + tmp_paths= 0; + } + Item_int_func::cleanup(); +} + + +longlong Item_func_json_contains_path::val_int() +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + uint n_arg; + longlong result; + + if ((null_value= args[0]->null_value)) + return 0; + + if (!ooa_parsed) + { + char buff[20]; + String *res, tmp(buff, sizeof(buff), &my_charset_bin); + res= args[1]->val_str(&tmp); + mode_one=eq_ascii_string(res->charset(), "one", + res->ptr(), res->length()); + if (!mode_one) + { + if (!eq_ascii_string(res->charset(), "all", res->ptr(), res->length())) + goto error; + } + ooa_parsed= ooa_constant; + } + + result= !mode_one; + for (n_arg=2; n_arg < arg_count; n_arg++) + { + uint array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_arg - 2; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+(n_arg-2)); + if (s_p && + json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto error; + c_path->parsed= c_path->constant; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + c_path->cur_step= c_path->p.steps; + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + /* Path wasn't found. */ + if (je.s.error) + goto error; + + if (!mode_one) + { + result= 0; + break; + } + } + else if (mode_one) + { + result= 1; + break; + } + } + + + return result; + +error: + null_value= 1; + return 0; +} + + +static int append_json_value(String *str, Item *item, String *tmp_val) +{ + if (item->is_bool_type()) + { + longlong v_int= item->val_int(); + const char *t_f; + int t_f_len; + + if (item->null_value) + goto append_null; + + if (v_int) + { + t_f= "true"; + t_f_len= 4; + } + else + { + t_f= "false"; + t_f_len= 5; + } + + return str->append(t_f, t_f_len); + } + { + String *sv= item->val_str(tmp_val); + if (item->null_value) + goto append_null; + if (item->result_type() == STRING_RESULT) + { + return str->append("\"", 1) || + st_append_escaped(str, sv) || + str->append("\"", 1); + } + return st_append_escaped(str, sv); + } + +append_null: + return str->append("null", 4); +} + + +static int append_json_keyname(String *str, Item *item, String *tmp_val) +{ + String *sv= item->val_str(tmp_val); + if (item->null_value) + goto append_null; + + return str->append("\"", 1) || + st_append_escaped(str, sv) || + str->append("\": ", 3); + +append_null: + return str->append("\"\": ", 4); +} + + +void Item_func_json_array::fix_length_and_dec() +{ + ulonglong char_length= 4; + uint n_arg; + + if (agg_arg_charsets_for_string_result(collation, args, arg_count)) + return; + + for (n_arg=0 ; n_arg < arg_count ; n_arg++) + char_length+= args[n_arg]->max_char_length() + 2; + + fix_char_length_ulonglong(char_length); + tmp_val.set_charset(collation.collation); +} + + +String *Item_func_json_array::val_str(String *str) +{ + DBUG_ASSERT(fixed == 1); + uint n_arg; + + str->length(0); + + if (str->append("[", 1) || + ((arg_count > 0) && append_json_value(str, args[0],&tmp_val))) + goto err_return; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + if (str->append(", ", 2) || + append_json_value(str, args[n_arg], &tmp_val)) + goto err_return; + } + + if (str->append("]", 1)) + goto err_return; + + return str; + +err_return: + /*TODO: Launch out of memory error. */ + null_value= 1; + return NULL; +} + + +void Item_func_json_array_append::fix_length_and_dec() +{ + uint n_arg; + ulonglong char_length; + + collation.set(args[0]->collation); + char_length= args[0]->max_char_length(); + + for (n_arg= 1; n_arg < arg_count; n_arg+= 2) + { + paths[n_arg-1].set_constant_flag(args[n_arg]->const_item()); + char_length+= args[n_arg+1]->max_char_length() + 4; + } + + fix_char_length_ulonglong(char_length); +} + + +String *Item_func_json_array_append::val_str(String *str) +{ + json_engine_t je; + String *js= args[0]->val_str(&tmp_js); + uint n_arg, n_path, str_rest_len; + const uchar *ar_end; + + DBUG_ASSERT(fixed == 1); + + if ((null_value= args[0]->null_value)) + return 0; + + for (n_arg=1, n_path=0; n_arg < arg_count; n_arg+=2, n_path++) + { + uint array_counters[JSON_DEPTH_LIMIT]; + json_path_with_flags *c_path= paths + n_path; + if (!c_path->parsed) + { + String *s_p= args[n_arg]->val_str(tmp_paths+n_path); + if (s_p && + json_path_setup(&c_path->p,s_p->charset(),(const uchar *) s_p->ptr(), + (const uchar *) s_p->ptr() + s_p->length())) + goto error; + c_path->parsed= c_path->constant; + } + if (args[n_arg]->null_value) + { + null_value= 1; + return 0; + } + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + c_path->cur_step= c_path->p.steps; + + if (json_find_path(&je, &c_path->p, &c_path->cur_step, array_counters)) + { + if (je.s.error) + goto error; + null_value= 1; + return 0; + } + + if (json_read_value(&je)) + goto error; + + if (je.value_type != JSON_VALUE_ARRAY) + { + /* Must be an array. */ + goto error; + } + + if (json_skip_level(&je)) + goto error; + + str->length(0); + str->set_charset(js->charset()); + if (str->reserve(js->length() + 8, 1024)) + goto error; /* Out of memory. */ + ar_end= je.s.c_str - je.sav_c_len; + str_rest_len= js->length() - (ar_end - (const uchar *) js->ptr()); + str->q_append(js->ptr(), ar_end-(const uchar *) js->ptr()); + str->append(", ", 2); + if (append_json_value(str, args[n_arg+1], &tmp_val)) + goto error; /* Out of memory. */ + + if (str->reserve(str_rest_len, 1024)) + goto error; /* Out of memory. */ + str->q_append((const char *) ar_end, str_rest_len); + } + + return str; + +error: + null_value= 1; + return 0; +} + + +String *Item_func_json_object::val_str(String *str) +{ + DBUG_ASSERT(fixed == 1); + uint n_arg; + + str->length(0); + + if (str->append("{", 1) || + (arg_count > 0 && + (append_json_keyname(str, args[0], &tmp_val) || + append_json_value(str, args[1], &tmp_val)))) + goto err_return; + + for (n_arg=2; n_arg < arg_count; n_arg+=2) + { + if (str->append(", ", 2) || + append_json_keyname(str, args[n_arg], &tmp_val) || + append_json_value(str, args[n_arg+1], &tmp_val)) + goto err_return; + } + + if (str->append("}", 1)) + goto err_return; + + return str; + +err_return: + /*TODO: Launch out of memory error. */ + null_value= 1; + return NULL; +} + + +String *Item_func_json_merge::val_str(String *str) +{ + DBUG_ASSERT(fixed == 1); + uint n_arg; + + str->length(0); + + if (str->append("[", 1) || + ((arg_count > 0) && append_json_value(str, args[0], &tmp_val))) + goto err_return; + + for (n_arg=1; n_arg < arg_count; n_arg++) + { + if (str->append(", ", 2) || + append_json_value(str, args[n_arg], &tmp_val)) + goto err_return; + } + + if (str->append("]", 1)) + goto err_return; + + return str; + +err_return: + /*TODO: Launch out of memory error. */ + null_value= 1; + return NULL; +} + + +longlong Item_func_json_length::val_int() +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + uint length= 0; + + if ((null_value= args[0]->null_value)) + return 0; + + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + do + { + if (je.state == JST_VALUE) + length++; + } while (json_scan_next(&je) == 0); + + if (je.s.error) + { + null_value= 1; + return 0; + } + + return length; + +} + + +longlong Item_func_json_depth::val_int() +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + uint depth= 0; + bool inc_depth= TRUE; + + if ((null_value= args[0]->null_value)) + return 0; + + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + do + { + switch (je.state) + { + case JST_VALUE: + if (inc_depth) + { + depth++; + inc_depth= FALSE; + } + break; + case JST_OBJ_START: + case JST_ARRAY_START: + inc_depth= TRUE; + break; + default: + break; + } + } while (json_scan_next(&je) == 0); + + if (je.s.error) + { + null_value= 1; + return 0; + } + + return depth; +} + + +void Item_func_json_type::fix_length_and_dec() +{ + collation.set(&my_charset_utf8_general_ci); + max_length= 12; +} + + +String *Item_func_json_type::val_str(String *str) +{ + String *js= args[0]->val_str(&tmp_js); + json_engine_t je; + const char *type; + + if ((null_value= args[0]->null_value)) + return 0; + + + json_scan_start(&je, js->charset(),(const uchar *) js->ptr(), + (const uchar *) js->ptr() + js->length()); + + if (json_read_value(&je)) + goto error; + + switch (je.value_type) + { + case JSON_VALUE_OBJECT: + type= "OBJECT"; + break; + case JSON_VALUE_ARRAY: + type= "ARRAY"; + break; + case JSON_VALUE_STRING: + type= "STRING"; + break; + case JSON_VALUE_NUMBER: + type= "NUMBER"; + break; + case JSON_VALUE_TRUE: + case JSON_VALUE_FALSE: + type= "BOOLEAN"; + break; + default: + type= "NULL"; + break; + } + + str->set(type, strlen(type), &my_charset_utf8_general_ci); + return str; + +error: + null_value= 1; + return 0; +} + + diff --git a/sql/item_jsonfunc.h b/sql/item_jsonfunc.h new file mode 100644 index 00000000000..54da67b5ab9 --- /dev/null +++ b/sql/item_jsonfunc.h @@ -0,0 +1,304 @@ +#ifndef ITEM_JSONFUNC_INCLUDED +#define ITEM_JSONFUNC_INCLUDED + +/* Copyright (c) 2016, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +/* This file defines all JSON functions */ + + +#include <json_lib.h> +#include "item_cmpfunc.h" // Item_bool_func +#include "item_strfunc.h" // Item_str_func + + +class json_path_with_flags +{ +public: + json_path_t p; + bool constant; + bool parsed; + json_path_step_t *cur_step; + void set_constant_flag(bool s_constant) + { + constant= s_constant; + parsed= FALSE; + } +}; + + +class Item_func_json_valid: public Item_int_func +{ +protected: + String tmp_value; + +public: + Item_func_json_valid(THD *thd, Item *json) : Item_int_func(thd, json) {} + longlong val_int(); + const char *func_name() const { return "json_valid"; } + void fix_length_and_dec() + { + Item_int_func::fix_length_and_dec(); + maybe_null= 1; + } + bool is_bool_type() { return true; } + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_valid>(thd, mem_root, this); } +}; + + +class Item_func_json_exists: public Item_int_func +{ +protected: + json_path_with_flags path; + String tmp_js, tmp_path; + +public: + Item_func_json_exists(THD *thd, Item *js, Item *path): + Item_int_func(thd, js, path) {} + const char *func_name() const { return "json_exists"; } + bool is_bool_type() { return true; } + void fix_length_and_dec(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_exists>(thd, mem_root, this); } + longlong val_int(); +}; + + +class Item_func_json_value: public Item_str_func +{ +protected: + json_path_with_flags path; + String tmp_js, tmp_path; + +public: + Item_func_json_value(THD *thd, Item *js, Item *path): + Item_str_func(thd, js, path) {} + const char *func_name() const { return "json_value"; } + void fix_length_and_dec(); + String *val_str(String *); + virtual bool check_and_get_value(json_engine_t *je, String *res, int *error); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_value>(thd, mem_root, this); } +}; + + +class Item_func_json_query: public Item_func_json_value +{ +public: + Item_func_json_query(THD *thd, Item *js, Item *path): + Item_func_json_value(thd, js, path) {} + const char *func_name() const { return "json_query"; } + bool check_and_get_value(json_engine_t *je, String *res, int *error); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_query>(thd, mem_root, this); } +}; + + +class Item_func_json_quote: public Item_str_func +{ +protected: + String tmp_s; + +public: + Item_func_json_quote(THD *thd, Item *s): Item_str_func(thd, s) {} + const char *func_name() const { return "json_quote"; } + void fix_length_and_dec(); + String *val_str(String *); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_quote>(thd, mem_root, this); } +}; + + +class Item_json_str_multipath: public Item_str_func +{ +protected: + json_path_with_flags *paths; + String *tmp_paths; +public: + Item_json_str_multipath(THD *thd, List<Item> &list): + Item_str_func(thd, list), tmp_paths(0) {} + bool fix_fields(THD *thd, Item **ref); + void cleanup(); + virtual uint get_n_paths() const = 0; +}; + + +class Item_func_json_extract: public Item_json_str_multipath +{ +protected: + String tmp_js; +public: + Item_func_json_extract(THD *thd, List<Item> &list): + Item_json_str_multipath(thd, list) {} + const char *func_name() const { return "json_extract"; } + void fix_length_and_dec(); + String *val_str(String *); + longlong val_int(); + uint get_n_paths() const { return arg_count - 1; } + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_extract>(thd, mem_root, this); } +}; + + +class Item_func_json_contains: public Item_int_func +{ +protected: + String tmp_js; + json_path_with_flags *paths; + String *tmp_paths; + bool a2_constant, a2_parsed; + String tmp_val, *val; +public: + Item_func_json_contains(THD *thd, List<Item> &list): + Item_int_func(thd, list), tmp_paths(0) {} + const char *func_name() const { return "json_contains"; } + bool fix_fields(THD *thd, Item **ref); + void fix_length_and_dec(); + void cleanup(); + longlong val_int(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_contains>(thd, mem_root, this); } +}; + + +class Item_func_json_contains_path: public Item_int_func +{ +protected: + String tmp_js; + json_path_with_flags *paths; + String *tmp_paths; + bool mode_one; + bool ooa_constant, ooa_parsed; + +public: + Item_func_json_contains_path(THD *thd, List<Item> &list): + Item_int_func(thd, list), tmp_paths(0) {} + const char *func_name() const { return "json_contains_path"; } + bool fix_fields(THD *thd, Item **ref); + void fix_length_and_dec(); + void cleanup(); + longlong val_int(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_contains_path>(thd, mem_root, this); } +}; + + +class Item_func_json_array: public Item_str_func +{ +protected: + String tmp_val; +public: + Item_func_json_array(THD *thd): + Item_str_func(thd) {} + Item_func_json_array(THD *thd, List<Item> &list): + Item_str_func(thd, list) {} + String *val_str(String *); + void fix_length_and_dec(); + const char *func_name() const { return "json_array"; } + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_array>(thd, mem_root, this); } +}; + + +class Item_func_json_array_append: public Item_json_str_multipath +{ +protected: + String tmp_js; + String tmp_val; +public: + Item_func_json_array_append(THD *thd, List<Item> &list): + Item_json_str_multipath(thd, list) {} + void fix_length_and_dec(); + String *val_str(String *); + uint get_n_paths() const { return arg_count/2; } + const char *func_name() const { return "json_array_append"; } + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_array_append>(thd, mem_root, this); } +}; + + +class Item_func_json_object: public Item_func_json_array +{ +public: + Item_func_json_object(THD *thd): + Item_func_json_array(thd) {} + Item_func_json_object(THD *thd, List<Item> &list): + Item_func_json_array(thd, list) {} + String *val_str(String *); + const char *func_name() const { return "json_object"; } + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_object>(thd, mem_root, this); } +}; + + +class Item_func_json_merge: public Item_func_json_array +{ +protected: + String tmp_val; +public: + Item_func_json_merge(THD *thd, List<Item> &list): + Item_func_json_array(thd, list) {} + String *val_str(String *); + const char *func_name() const { return "json_merge"; } + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_merge>(thd, mem_root, this); } +}; + + +class Item_func_json_length: public Item_int_func +{ +protected: + String tmp_js; + String tmp_path; +public: + Item_func_json_length(THD *thd, List<Item> &list): + Item_int_func(thd, list) {} + const char *func_name() const { return "json_length"; } + longlong val_int(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_length>(thd, mem_root, this); } +}; + + +class Item_func_json_depth: public Item_int_func +{ +protected: + String tmp_js; +public: + Item_func_json_depth(THD *thd, Item *js): Item_int_func(thd, js) {} + const char *func_name() const { return "json_depth"; } + longlong val_int(); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_depth>(thd, mem_root, this); } +}; + + +class Item_func_json_type: public Item_str_func +{ +protected: + String tmp_js; +public: + Item_func_json_type(THD *thd, Item *js): Item_str_func(thd, js) {} + const char *func_name() const { return "json_type"; } + void fix_length_and_dec(); + String *val_str(String *); + Item *get_copy(THD *thd, MEM_ROOT *mem_root) + { return get_item_copy<Item_func_json_type>(thd, mem_root, this); } +}; + + +#endif /* ITEM_JSONFUNC_INCLUDED */ diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc index cbbdeea0205..4d848a0f737 100644 --- a/sql/item_xmlfunc.cc +++ b/sql/item_xmlfunc.cc @@ -13,10 +13,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifdef __GNUC__ -#pragma implementation -#endif - #include <my_global.h> #include "sql_priv.h" /* @@ -407,19 +403,6 @@ public: /* - We need to distinguish a number from a boolean: - a[1] and a[true] are different things in XPath. -*/ -class Item_bool :public Item_int -{ -public: - Item_bool(THD *thd, int32 i): Item_int(thd, i) {} - const char *func_name() const { return "xpath_bool"; } - bool is_bool_type() { return true; } -}; - - -/* Converts its argument into a boolean value. * a number is true if it is non-zero * a node-set is true if and only if it is non-empty @@ -1214,13 +1197,13 @@ my_xpath_keyword(MY_XPATH *x, static Item *create_func_true(MY_XPATH *xpath, Item **args, uint nargs) { - return new (xpath->thd->mem_root) Item_bool(xpath->thd, 1); + return new (xpath->thd->mem_root) Item_bool(xpath->thd, "xpath_bool", 1); } static Item *create_func_false(MY_XPATH *xpath, Item **args, uint nargs) { - return new (xpath->thd->mem_root) Item_bool(xpath->thd, 0); + return new (xpath->thd->mem_root) Item_bool(xpath->thd, "xpath_bool", 0); } diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h index 3c58955c96a..3c071b897e2 100644 --- a/sql/item_xmlfunc.h +++ b/sql/item_xmlfunc.h @@ -21,11 +21,6 @@ /* This file defines all XML functions */ -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - - typedef struct my_xml_node_st MY_XML_NODE; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 6a03f58c3b1..e17a514a391 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -13869,13 +13869,13 @@ literal: } | FALSE_SYM { - $$= new (thd->mem_root) Item_int(thd, (char*) "FALSE",0,1); + $$= new (thd->mem_root) Item_bool(thd, (char*) "FALSE",0); if ($$ == NULL) MYSQL_YYABORT; } | TRUE_SYM { - $$= new (thd->mem_root) Item_int(thd, (char*) "TRUE",1,1); + $$= new (thd->mem_root) Item_bool(thd, (char*) "TRUE",1); if ($$ == NULL) MYSQL_YYABORT; } diff --git a/storage/connect/mysql-test/connect/disabled.def b/storage/connect/mysql-test/connect/disabled.def index 4e07b5c0576..a97ba67d73e 100644 --- a/storage/connect/mysql-test/connect/disabled.def +++ b/storage/connect/mysql-test/connect/disabled.def @@ -13,3 +13,5 @@ jdbc : Variable settings depend on machine configuration jdbc_new : Variable settings depend on machine configuration jdbc_oracle : Variable settings depend on machine configuration jdbc_postgresql : Variable settings depend on machine configuration +json_udf : conflicts with the server JSON functions +json_udf_bin : conflicts with the server JSON functions diff --git a/strings/CMakeLists.txt b/strings/CMakeLists.txt index 1e364bc951b..96de24b4770 100644 --- a/strings/CMakeLists.txt +++ b/strings/CMakeLists.txt @@ -23,7 +23,7 @@ SET(STRINGS_SOURCES bchange.c bmove_upp.c ctype-big5.c ctype-bin.c ctype-cp932.c str2int.c str_alloc.c strcend.c strend.c strfill.c strmake.c strmov.c strnmov.c strxmov.c strxnmov.c xml.c strmov_overlapp.c - my_strchr.c strcont.c strappend.c) + my_strchr.c strcont.c strappend.c json_lib.c) IF(NOT HAVE_STRNLEN) # OSX below 10.7 did not have strnlen diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index e154545e4f6..1c9dfe7324b 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1190,10 +1190,13 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), #endif /* HAVE_CHARSET_mb2*/ +/* + Next part is actually HAVE_CHARSET_utf16-specific, + but the JSON functions needed my_utf16_uni() + so the #ifdef was moved lower. +*/ -#ifdef HAVE_CHARSET_utf16 - /* D800..DB7F - Non-provate surrogate high (896 pages) DB80..DBFF - Private surrogate high (128 pages) @@ -1260,7 +1263,12 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1) #undef IS_MB2_CHAR #undef IS_MB4_CHAR -static int +/* + These two functions are used in JSON library, so made exportable + and unconditionally compiled into the library. +*/ + +/*static*/ int my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), my_wc_t *pwc, const uchar *s, const uchar *e) { @@ -1293,7 +1301,7 @@ my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), } -static int +/*static*/ int my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)), my_wc_t wc, uchar *s, uchar *e) { @@ -1323,6 +1331,9 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)), } +#ifdef HAVE_CHARSET_utf16 + + static inline void my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc) { diff --git a/strings/json_lib.c b/strings/json_lib.c new file mode 100644 index 00000000000..3f55280e3fa --- /dev/null +++ b/strings/json_lib.c @@ -0,0 +1,1553 @@ +#include <my_global.h> +#include <m_ctype.h> + + +#include "json_lib.h" + +/* + JSON escaping lets user specify UTF16 codes of characters. + So we're going to need the UTF16 charset capabilities. Let's import + them from the utf16 charset. +*/ +int my_utf16_uni(CHARSET_INFO *cs, + my_wc_t *pwc, const uchar *s, const uchar *e); +int my_uni_utf16(CHARSET_INFO *cs, my_wc_t wc, uchar *s, uchar *e); + + +void json_string_set_str(json_string_t *s, + const uchar *str, const uchar *end) +{ + s->c_str= str; + s->str_end= end; +} + + +void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs) +{ + s->cs= i_cs; + s->error= 0; + s->wc= i_cs->cset->mb_wc; +} + + +static void json_string_setup(json_string_t *s, + CHARSET_INFO *i_cs, const uchar *str, + const uchar *end) +{ + json_string_set_cs(s, i_cs); + json_string_set_str(s, str, end); +} + + +enum json_char_classes { + C_EOS, /* end of string */ + C_LCURB, /* { */ + C_RCURB, /* } */ + C_LSQRB, /* [ */ + C_RSQRB, /* ] */ + C_COLON, /* : */ + C_COMMA, /* , */ + C_QUOTE, /* " */ + C_DIGIT, /* -0123456789 */ + C_LOW_F, /* 'f' (for "false") */ + C_LOW_N, /* 'n' (for "null") */ + C_LOW_T, /* 't' (for "true") */ + C_ETC, /* everything else */ + C_ERR, /* character disallowed in JSON */ + C_BAD, /* invalid character, charset handler cannot read it */ + NR_C_CLASSES, /* Counter for classes that handled with functions. */ + C_SPACE /* space. Doesn't need specific handlers, so after the counter.*/ +}; + + +/* + This array maps first 128 Unicode Code Points into classes. + The remaining Unicode characters should be mapped to C_ETC. +*/ + +static enum json_char_classes json_chr_map[128] = { + C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, + C_ERR, C_SPACE, C_SPACE, C_ERR, C_ERR, C_SPACE, C_ERR, C_ERR, + C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, + C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, C_ERR, + + C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_COMMA, C_DIGIT, C_ETC, C_ETC, + C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, + C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LSQRB, C_ETC, C_RSQRB, C_ETC, C_ETC, + + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_F, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_N, C_ETC, + C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_T, C_ETC, C_ETC, C_ETC, + C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC +}; + + +/* + JSON parser actually has more states than the 'enum json_states' + declares. But the rest of the states aren't seen to the user so let's + specify them here to avoid confusion. +*/ + +enum json_all_states { + JST_DONE= NR_JSON_USER_STATES, /* ok to finish */ + JST_OBJ_CONT= NR_JSON_USER_STATES+1, /* object continues */ + JST_ARRAY_CONT= NR_JSON_USER_STATES+2, /* array continues */ + JST_READ_VALUE= NR_JSON_USER_STATES+3, /* value is being read */ + NR_JSON_STATES= NR_JSON_USER_STATES+4 +}; + + +typedef int (*json_state_handler)(json_engine_t *); + + +/* The string is broken. */ +static int unexpected_eos(json_engine_t *j) +{ + j->s.error= JE_EOS; + return 1; +} + + +/* This symbol here breaks the JSON syntax. */ +static int syntax_error(json_engine_t *j) +{ + j->s.error= JE_SYN; + return 1; +} + + +/* Value of object. */ +static int mark_object(json_engine_t *j) +{ + j->state= JST_OBJ_START; + *(++j->stack_p)= JST_OBJ_CONT; + return 0; +} + + +/* Read value of object. */ +static int read_obj(json_engine_t *j) +{ + j->state= JST_OBJ_START; + j->value_type= JSON_VALUE_OBJECT; + j->value= j->value_begin; + *(++j->stack_p)= JST_OBJ_CONT; + return 0; +} + + +/* Value of array. */ +static int mark_array(json_engine_t *j) +{ + j->state= JST_ARRAY_START; + *(++j->stack_p)= JST_ARRAY_CONT; + j->value= j->value_begin; + return 0; +} + +/* Read value of object. */ +static int read_array(json_engine_t *j) +{ + j->state= JST_ARRAY_START; + j->value_type= JSON_VALUE_ARRAY; + j->value= j->value_begin; + *(++j->stack_p)= JST_ARRAY_CONT; + return 0; +} + + + +/* + Character classes inside the JSON string constant. + We mostly need this to parse escaping properly. + Escapings availabe in JSON are: + \" - quotation mark + \\ - backslash + \b - backspace UNICODE 8 + \f - formfeed UNICODE 12 + \n - newline UNICODE 10 + \r - carriage return UNICODE 13 + \t - horizontal tab UNICODE 9 + \u{four-hex-digits} - code in UCS16 character set +*/ +enum json_string_char_classes { + S_0= 0, + S_1= 1, + S_2= 2, + S_3= 3, + S_4= 4, + S_5= 5, + S_6= 6, + S_7= 7, + S_8= 8, + S_9= 9, + S_A= 10, + S_B= 11, + S_C= 12, + S_D= 13, + S_E= 14, + S_F= 15, + S_ETC= 36, /* rest of characters. */ + S_QUOTE= 37, + S_BKSL= 38, /* \ */ + S_ERR= 100, /* disallowed */ +}; + + +/* This maps characters to their types inside a string constant. */ +static enum json_string_char_classes json_instr_chr_map[128] = { + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, S_ERR, + + S_ETC, S_ETC, S_QUOTE, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7, + S_8, S_9, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + + S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_BKSL, S_ETC, S_ETC, S_ETC, + + S_ETC, S_A, S_B, S_C, S_D, S_E, S_F, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, + S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC, S_ETC +}; + + +static int read_4_hexdigits(json_string_t *s, uchar *dest) +{ + int i, t, c_len; + for (i=0; i<4; i++) + { + if ((c_len= json_next_char(s)) <= 0) + return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; + + if (s->c_next >= 128 || (t= json_instr_chr_map[s->c_next]) >= S_F) + return s->error= JE_SYN; + + s->c_str+= c_len; + dest[i/2]+= (i % 2) ? t : t*16; + } + return 0; +} + + +static int json_handle_esc(json_string_t *s) +{ + int t, c_len; + + if ((c_len= json_next_char(s)) <= 0) + return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; + + s->c_str+= c_len; + switch (s->c_next) + { + case 'b': + s->c_next= 8; + return 0; + case 'f': + s->c_next= 12; + return 0; + case 'n': + s->c_next= 10; + return 0; + case 'r': + s->c_next= 13; + return 0; + case 't': + s->c_next= 9; + return 0; + } + + if (s->c_next < 128 && (t= json_instr_chr_map[s->c_next]) == S_ERR) + { + s->c_str-= c_len; + return s->error= JE_ESCAPING; + } + + + if (s->c_next != 'u') + return 0; + + { + /* + Read the four-hex-digits code. + If symbol is not in the Basic Multilingual Plane, we're reading + the string for the next four digits to compose the UTF-16 surrogate pair. + */ + uchar code[4]= {0,0,0,0}; + + if (read_4_hexdigits(s, code)) + return 1; + + if ((c_len= my_utf16_uni(0, &s->c_next, code, code+2)) == 2) + return 0; + + if (c_len != MY_CS_TOOSMALL4) + return s->error= JE_BAD_CHR; + + if ((c_len= json_next_char(s)) <= 0) + return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; + if (s->c_next != '\\') + return s->error= JE_SYN; + + if ((c_len= json_next_char(s)) <= 0) + return s->error= json_eos(s) ? JE_EOS : JE_BAD_CHR; + if (s->c_next != 'u') + return s->error= JE_SYN; + + if (read_4_hexdigits(s, code+2)) + return 1; + + if ((c_len= my_utf16_uni(0, &s->c_next, code, code+4)) == 2) + return 0; + } + return s->error= JE_BAD_CHR; +} + + +int json_read_string_const_chr(json_string_t *js) +{ + int c_len; + + if ((c_len= json_next_char(js)) > 0) + { + js->c_str+= c_len; + return (js->c_next == '\\') ? json_handle_esc(js) : 0; + } + js->error= json_eos(js) ? JE_EOS : JE_BAD_CHR; + return 1; +} + + +static int skip_str_constant(json_engine_t *j) +{ + int t, c_len; + for (;;) + { + if ((c_len= json_next_char(&j->s)) > 0) + { + j->s.c_str+= c_len; + if (j->s.c_next >= 128 || ((t=json_instr_chr_map[j->s.c_next]) <= S_ETC)) + continue; + + if (j->s.c_next == '"') + break; + if (j->s.c_next == '\\') + { + if (json_handle_esc(&j->s)) + return 1; + continue; + } + /* Symbol not allowed in JSON. */ + return j->s.error= JE_NOT_JSON_CHR; + } + else + return j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; + } + + j->state= *j->stack_p; + return 0; +} + + +/* Scalar string. */ +static int v_string(json_engine_t *j) +{ + return skip_str_constant(j) || json_scan_next(j); +} + + +/* Read scalar string. */ +static int read_strn(json_engine_t *j) +{ + j->value= j->s.c_str; + + if (skip_str_constant(j)) + return 1; + + j->state= *j->stack_p; + j->value_type= JSON_VALUE_STRING; + j->value_len= (j->s.c_str - j->value) - 1; + return 0; +} + + +/* + We have dedicated parser for numeric constants. It's similar + to the main JSON parser, we similarly define character classes, + map characters to classes and implement the state-per-class + table. Though we don't create functions that handle + particular classes, just specify what new state should parser + get in this case. +*/ +enum json_num_char_classes { + N_MINUS, + N_PLUS, + N_ZERO, + N_DIGIT, + N_POINT, + N_E, + N_END, + N_EEND, + N_ERR, + N_NUM_CLASSES +}; + + +static enum json_num_char_classes json_num_chr_map[128] = { + N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, + N_ERR, N_END, N_END, N_ERR, N_ERR, N_END, N_ERR, N_ERR, + N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, + N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, N_ERR, + + N_END, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_PLUS, N_END, N_MINUS, N_POINT, N_EEND, + N_ZERO, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, N_DIGIT, + N_DIGIT, N_DIGIT, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, + + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND, + + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_E, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, + N_EEND, N_EEND, N_EEND, N_EEND, N_EEND, N_END, N_EEND, N_EEND, +}; + + +enum json_num_states { + NS_OK, /* Number ended. */ + NS_GO, /* Initial state. */ + NS_GO1, /* If the number starts with '-'. */ + NS_Z, /* If the number starts with '0'. */ + NS_Z1, /* If the numbers starts with '-0'. */ + NS_INT, /* Integer part. */ + NS_FRAC,/* Fractional part. */ + NS_EX, /* Exponential part begins. */ + NS_EX1, /* Exponential part continues. */ + NS_NUM_STATES +}; + + +static int json_num_states[NS_NUM_STATES][N_NUM_CLASSES]= +{ +/* - + 0 1..9 POINT E END_OK ERROR */ +/*OK*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR }, +/*GO*/ { NS_GO1, JE_SYN, NS_Z, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR }, +/*GO1*/ { JE_SYN, JE_SYN, NS_Z1, NS_INT, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR }, +/*ZERO*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, NS_OK, JE_BAD_CHR }, +/*ZE1*/ { JE_SYN, JE_SYN, JE_SYN, JE_SYN, NS_FRAC, JE_SYN, JE_SYN, JE_BAD_CHR }, +/*INT*/ { JE_SYN, JE_SYN, NS_INT, NS_INT, NS_FRAC, NS_EX, NS_OK, JE_BAD_CHR }, +/*FRAC*/ { JE_SYN, JE_SYN, NS_FRAC, NS_FRAC,JE_SYN, NS_EX, NS_OK, JE_BAD_CHR }, +/*EX*/ { NS_EX1, NS_EX1, NS_EX1, NS_EX1, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR }, +/*EX1*/ { JE_SYN, JE_SYN, NS_EX1, NS_EX1, JE_SYN, JE_SYN, JE_SYN, JE_BAD_CHR } +}; + + +static int skip_num_constant(json_engine_t *j) +{ + int state= json_num_states[NS_GO][json_num_chr_map[j->s.c_next]]; + int c_len; + + for (;;) + { + if ((c_len= json_next_char(&j->s)) > 0) + { + if ((state= json_num_states[state][json_num_chr_map[j->s.c_next]]) > 0) + { + j->s.c_str+= c_len; + continue; + } + break; + } + + if ((j->s.error= + json_eos(&j->s) ? json_num_states[state][N_END] : JE_BAD_CHR) < 0) + return 1; + else + break; + } + + j->state= *j->stack_p; + return 0; +} + + +/* Scalar numeric. */ +static int v_number(json_engine_t *j) +{ + return skip_num_constant(j) || json_scan_next(j); +} + + +/* Read numeric constant. */ +static int read_num(json_engine_t *j) +{ + j->value= j->value_begin; + if (skip_num_constant(j) == 0) + { + j->value_type= JSON_VALUE_NUMBER; + j->value_len= j->s.c_str - j->value_begin; + return 0; + } + return 1; +} + + +/* Check that the JSON string matches the argument and skip it. */ +static int skip_string_verbatim(json_string_t *s, const char *str) +{ + int c_len; + while (*str) + { + if ((c_len= json_next_char(s)) > 0) + { + if (s->c_next == (my_wc_t) *(str++)) + { + s->c_str+= c_len; + continue; + } + return JE_SYN; + } + return json_eos(s) ? JE_EOS : JE_BAD_CHR; + } + + return 0; +} + + +/* Scalar false. */ +static int v_false(json_engine_t *j) +{ + if (skip_string_verbatim(&j->s, "alse")) + return 1; + j->state= *j->stack_p; + return json_scan_next(j); +} + + +/* Scalar null. */ +static int v_null(json_engine_t *j) +{ + if (skip_string_verbatim(&j->s, "ull")) + return 1; + j->state= *j->stack_p; + return json_scan_next(j); +} + + +/* Scalar true. */ +static int v_true(json_engine_t *j) +{ + if (skip_string_verbatim(&j->s, "rue")) + return 1; + j->state= *j->stack_p; + return json_scan_next(j); +} + + +/* Read false. */ +static int read_false(json_engine_t *j) +{ + j->value_type= JSON_VALUE_FALSE; + j->value= j->value_begin; + j->state= *j->stack_p; + j->value_len= 5; + return skip_string_verbatim(&j->s, "alse"); +} + + +/* Read null. */ +static int read_null(json_engine_t *j) +{ + j->value_type= JSON_VALUE_NULL; + j->value= j->value_begin; + j->state= *j->stack_p; + j->value_len= 4; + return skip_string_verbatim(&j->s, "ull"); +} + + +/* Read true. */ +static int read_true(json_engine_t *j) +{ + j->value_type= JSON_VALUE_TRUE; + j->value= j->value_begin; + j->state= *j->stack_p; + j->value_len= 4; + return skip_string_verbatim(&j->s, "rue"); +} + + +/* Disallowed character. */ +static int not_json_chr(json_engine_t *j) +{ + j->s.error= JE_NOT_JSON_CHR; + return 1; +} + + +/* Bad character. */ +static int bad_chr(json_engine_t *j) +{ + j->s.error= JE_BAD_CHR; + return 1; +} + + +/* Correct finish. */ +static int done(json_engine_t *j __attribute__((unused))) +{ + return 1; +} + + +/* End of the object. */ +static int end_object(json_engine_t *j) +{ + j->stack_p--; + j->state= JST_OBJ_END; + return 0; +} + + +/* End of the array. */ +static int end_array(json_engine_t *j) +{ + j->stack_p--; + j->state= JST_ARRAY_END; + return 0; +} + + +/* Start reading key name. */ +static int read_keyname(json_engine_t *j) +{ + j->state= JST_KEY; + return 0; +} + + +static void get_first_nonspace(json_string_t *js, int *t_next, int *c_len) +{ + do + { + if ((*c_len= json_next_char(js)) <= 0) + *t_next= json_eos(js) ? C_EOS : C_BAD; + else + { + *t_next= (js->c_next < 128) ? json_chr_map[js->c_next] : C_ETC; + js->c_str+= *c_len; + } + } while (*t_next == C_SPACE); +} + + +/* Next key name. */ +static int next_key(json_engine_t *j) +{ + int t_next, c_len; + get_first_nonspace(&j->s, &t_next, &c_len); + + if (t_next == C_QUOTE) + { + j->state= JST_KEY; + return 0; + } + + j->s.error= (t_next == C_EOS) ? JE_EOS : + ((t_next == C_BAD) ? JE_BAD_CHR : + JE_SYN); + return 1; +} + + +/* Forward declarations. */ +static int skip_colon(json_engine_t *j); +static int skip_key(json_engine_t *j); +static int struct_end_cb(json_engine_t *j); +static int struct_end_qb(json_engine_t *j); +static int struct_end_cm(json_engine_t *j); +static int struct_end_eos(json_engine_t *j); + + +static int next_item(json_engine_t *j) +{ + j->state= JST_VALUE; + return 0; +} + + +static int array_item(json_engine_t *j) +{ + j->state= JST_VALUE; + j->s.c_str-= j->sav_c_len; + return 0; +} + + +static json_state_handler json_actions[NR_JSON_STATES][NR_C_CLASSES]= +/* + EOS { } [ ] + : , " -0..9 f + n t ETC ERR BAD +*/ +{ + {/*VALUE*/ + unexpected_eos, mark_object, syntax_error, mark_array, syntax_error, + syntax_error, syntax_error,v_string, v_number, v_false, + v_null, v_true, syntax_error, not_json_chr, bad_chr}, + {/*KEY*/ + unexpected_eos, skip_key, skip_key, skip_key, skip_key, + skip_key, skip_key, skip_colon, skip_key, skip_key, + skip_key, skip_key, skip_key, not_json_chr, bad_chr}, + {/*OBJ_START*/ + unexpected_eos, syntax_error, end_object, syntax_error, syntax_error, + syntax_error, syntax_error, read_keyname, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr}, + {/*OBJ_END*/ + struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb, + syntax_error, struct_end_cm,syntax_error, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr}, + {/*ARRAY_START*/ + unexpected_eos, array_item, syntax_error, array_item, end_array, + syntax_error, syntax_error, array_item, array_item, array_item, + array_item, array_item, syntax_error, not_json_chr, bad_chr}, + {/*ARRAY_END*/ + struct_end_eos, syntax_error, struct_end_cb, syntax_error, struct_end_qb, + syntax_error, struct_end_cm, syntax_error, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr}, + {/*DONE*/ + done, syntax_error, syntax_error, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr}, + {/*OBJ_CONT*/ + unexpected_eos, syntax_error, end_object, syntax_error, end_array, + syntax_error, next_key, syntax_error, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr}, + {/*ARRAY_CONT*/ + unexpected_eos, syntax_error, syntax_error, syntax_error, end_array, + syntax_error, next_item, syntax_error, syntax_error, syntax_error, + syntax_error, syntax_error, syntax_error, not_json_chr, bad_chr}, + {/*READ_VALUE*/ + unexpected_eos, read_obj, syntax_error, read_array, syntax_error, + syntax_error, syntax_error, read_strn, read_num, read_false, + read_null, read_true, syntax_error, not_json_chr, bad_chr}, +}; + + + +int json_scan_start(json_engine_t *je, + CHARSET_INFO *i_cs, const uchar *str, const uchar *end) +{ + json_string_setup(&je->s, i_cs, str, end); + je->stack[0]= JST_DONE; + je->stack_p= je->stack; + je->state= JST_VALUE; + return 0; +} + + +/* Skip colon and the value. */ +static int skip_colon(json_engine_t *j) +{ + int t_next, c_len; + + get_first_nonspace(&j->s, &t_next, &c_len); + + if (t_next == C_COLON) + { + get_first_nonspace(&j->s, &t_next, &c_len); + return json_actions[JST_VALUE][t_next](j); + } + + j->s.error= (t_next == C_EOS) ? JE_EOS : + ((t_next == C_BAD) ? JE_BAD_CHR: + JE_SYN); + + return 1; +} + + +/* Skip colon and the value. */ +static int skip_key(json_engine_t *j) +{ + int t_next, c_len; + while (json_read_keyname_chr(j) == 0) {} + + if (j->s.error) + return 1; + + get_first_nonspace(&j->s, &t_next, &c_len); + return json_actions[JST_VALUE][t_next](j); +} + + +/* + Handle EOS after the end of an object or array. + To do that we should pop the stack to see if + we are inside an object, or an array, and + run our 'state machine' accordingly. +*/ +static int struct_end_eos(json_engine_t *j) +{ return json_actions[*j->stack_p][C_EOS](j); } + + +/* + Handle '}' after the end of an object or array. + To do that we should pop the stack to see if + we are inside an object, or an array, and + run our 'state machine' accordingly. +*/ +static int struct_end_cb(json_engine_t *j) +{ return json_actions[*j->stack_p][C_RCURB](j); } + + +/* + Handle ']' after the end of an object or array. + To do that we should pop the stack to see if + we are inside an object, or an array, and + run our 'state machine' accordingly. +*/ +static int struct_end_qb(json_engine_t *j) +{ return json_actions[*j->stack_p][C_RSQRB](j); } + + +/* + Handle ',' after the end of an object or array. + To do that we should pop the stack to see if + we are inside an object, or an array, and + run our 'state machine' accordingly. +*/ +static int struct_end_cm(json_engine_t *j) +{ return json_actions[*j->stack_p][C_COMMA](j); } + + +int json_read_keyname_chr(json_engine_t *j) +{ + int c_len, t; + + if ((c_len= json_next_char(&j->s)) > 0) + { + j->s.c_str+= c_len; + if (j->s.c_next>= 128 || (t= json_instr_chr_map[j->s.c_next]) <= S_ETC) + return 0; + + switch (t) + { + case S_QUOTE: + for (;;) /* Skip spaces until ':'. */ + { + if ((c_len= json_next_char(&j->s) > 0)) + { + if (j->s.c_next == ':') + { + j->s.c_str+= c_len; + j->state= JST_VALUE; + return 1; + } + + if (j->s.c_next < 128 && json_chr_map[j->s.c_next] == C_SPACE) + { + j->s.c_str+= c_len; + continue; + } + j->s.error= JE_SYN; + break; + } + j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; + break; + } + return 1; + case S_BKSL: + return json_handle_esc(&j->s); + case S_ERR: + j->s.c_str-= c_len; + j->s.error= JE_STRING_CONST; + return 1; + } + } + j->s.error= json_eos(&j->s) ? JE_EOS : JE_BAD_CHR; + return 1; +} + + +int json_read_value(json_engine_t *j) +{ + int t_next, c_len, res; + + if (j->state == JST_KEY) + { + while (json_read_keyname_chr(j) == 0) {} + + if (j->s.error) + return 1; + } + + get_first_nonspace(&j->s, &t_next, &c_len); + + j->value_begin= j->s.c_str-c_len; + res= json_actions[JST_READ_VALUE][t_next](j); + j->value_end= j->s.c_str; + return res; +} + + +int json_scan_next(json_engine_t *j) +{ + int t_next; + + get_first_nonspace(&j->s, &t_next, &j->sav_c_len); + return json_actions[j->state][t_next](j); +} + + +enum json_path_chr_classes { + P_EOS, /* end of string */ + P_USD, /* $ */ + P_ASTER, /* * */ + P_LSQRB, /* [ */ + P_RSQRB, /* ] */ + P_POINT, /* . */ + P_ZERO, /* 0 */ + P_DIGIT, /* 123456789 */ + P_L, /* l (for "lax") */ + P_S, /* s (for "strict") */ + P_SPACE, /* space */ + P_BKSL, /* \ */ + P_ETC, /* everything else */ + P_ERR, /* character disallowed in JSON*/ + P_BAD, /* invalid character */ + N_PATH_CLASSES, +}; + + +static enum json_path_chr_classes json_path_chr_map[128] = { + P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, + P_ERR, P_SPACE, P_SPACE, P_ERR, P_ERR, P_SPACE, P_ERR, P_ERR, + P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, + P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, + + P_SPACE, P_ETC, P_ETC, P_ETC, P_USD, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC, + P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, + P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, + + P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_ETC, P_LSQRB, P_BKSL, P_RSQRB, P_ETC, P_ETC, + + P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_ETC, P_ETC, P_L, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_S, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, + P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC +}; + + +enum json_path_states { + PS_GO, /* Initial state. */ + PS_LAX, /* Parse the 'lax' keyword. */ + PS_PT, /* New path's step begins. */ + PS_AR, /* Parse array step. */ + PS_AWD, /* Array wildcard. */ + PS_Z, /* '0' (as an array item number). */ + PS_INT, /* Parse integer (as an array item number). */ + PS_AS, /* Space. */ + PS_KEY, /* Key. */ + PS_KNM, /* Parse key name. */ + PS_KWD, /* Key wildcard. */ + N_PATH_STATES, /* Below are states that aren't in the transitions table. */ + PS_SCT, /* Parse the 'strict' keyword. */ + PS_EKY, /* '.' after the keyname so next step is the key. */ + PS_EAR, /* '[' after the keyname so next step is the array. */ + PS_ESC, /* Escaping in the keyname. */ + PS_OK, /* Path normally ended. */ + PS_KOK /* EOS after the keyname so end the path normally. */ +}; + + +static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]= +{ +/* + EOS $, * [ ] . 0 + 1..9 L S SPACE \ ETC ERR + BAD +*/ +/* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, + JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, + JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* PT */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN, + JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z, + PS_INT, JE_SYN, JE_SYN, PS_AR, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, + JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, + JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT, + PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN, + JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM, + PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KNM, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* KNM */ { PS_KOK, PS_KNM, PS_KNM, PS_EAR, PS_KNM, PS_EKY, PS_KNM, + PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, JE_NOT_JSON_CHR, + JE_BAD_CHR}, +/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN, + JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_NOT_JSON_CHR, + JE_BAD_CHR} +}; + + +int json_path_setup(json_path_t *p, + CHARSET_INFO *i_cs, const uchar *str, const uchar *end) +{ + int c_len, t_next, state= PS_GO; + + json_string_setup(&p->s, i_cs, str, end); + + p->steps[0].type= JSON_PATH_ARRAY; + p->steps[0].wild= 1; + p->last_step= p->steps; + p->mode_strict= FALSE; + + do + { + if ((c_len= json_next_char(&p->s)) <= 0) + t_next= json_eos(&p->s) ? P_EOS : P_BAD; + else + t_next= (p->s.c_next >= 128) ? P_ETC : json_path_chr_map[p->s.c_next]; + + if ((state= json_path_transitions[state][t_next]) < 0) + return p->s.error= state; + + p->s.c_str+= c_len; + + switch (state) + { + case PS_LAX: + if ((p->s.error= skip_string_verbatim(&p->s, "ax"))) + return 1; + p->mode_strict= FALSE; + continue; + case PS_SCT: + if ((p->s.error= skip_string_verbatim(&p->s, "rict"))) + return 1; + p->mode_strict= TRUE; + state= PS_LAX; + continue; + case PS_AWD: + p->last_step->wild= 1; + continue; + case PS_INT: + p->last_step->n_item*= 10; + p->last_step->n_item+= p->s.c_next - '0'; + continue; + case PS_EKY: + p->last_step->key_end= p->s.c_str - c_len; + state= PS_KEY; + /* Note no 'continue' here. */ + case PS_KEY: + p->last_step++; + p->last_step->type= JSON_PATH_KEY; + p->last_step->wild= 0; + p->last_step->key= p->s.c_str; + continue; + case PS_EAR: + p->last_step->key_end= p->s.c_str - c_len; + state= PS_AR; + /* Note no 'continue' here. */ + case PS_AR: + p->last_step++; + p->last_step->type= JSON_PATH_ARRAY; + p->last_step->wild= 0; + p->last_step->n_item= 0; + continue; + case PS_KWD: + p->last_step->wild= 1; + continue; + case PS_ESC: + if (json_handle_esc(&p->s)) + return 1; + continue; + case PS_KOK: + p->last_step->key_end= p->s.c_str - c_len; + state= PS_OK; + break; + }; + } while (state != PS_OK); + + return 0; +} + + +int json_skip_level(json_engine_t *j) +{ + int ct= 0; + + while (json_scan_next(j) == 0) + { + switch (j->state) { + case JST_OBJ_START: + case JST_ARRAY_START: + ct++; + break; + case JST_OBJ_END: + case JST_ARRAY_END: + if (ct == 0) + return 0; + ct--; + break; + } + } + + return 1; +} + + +int json_skip_key(json_engine_t *j) +{ + if (json_read_value(j)) + return 1; + + if (json_value_scalar(j)) + return 0; + + return json_skip_level(j); +} + + +/* + Current step of the patch matches the JSON construction. + Now we should either stop the search or go to the next + step of the path. +*/ +static int handle_match(json_engine_t *je, json_path_t *p, + json_path_step_t **p_cur_step, uint *array_counters) +{ + DBUG_ASSERT(*p_cur_step < p->last_step); + + if (json_read_value(je)) + return 1; + + if (json_value_scalar(je)) + return 0; + + (*p_cur_step)++; + array_counters[*p_cur_step - p->steps]= 0; + + if ((int) je->value_type != (int) (*p_cur_step)->type) + { + (*p_cur_step)--; + return json_skip_level(je); + } + + return 0; +} + + +/* + Check if the name of the current JSON key matches + the step of the path. +*/ +static int json_key_matches(json_engine_t *je, json_string_t *k) +{ + while (json_read_keyname_chr(je) == 0) + { + if (json_read_string_const_chr(k) || + je->s.c_next != k->c_next) + return 0; + } + + if (json_read_string_const_chr(k)) + return 1; + + return 0; +} + + +int json_find_path(json_engine_t *je, + json_path_t *p, json_path_step_t **p_cur_step, + uint *array_counters) +{ + json_string_t key_name; + + json_string_set_cs(&key_name, p->s.cs); + + do + { + json_path_step_t *cur_step= *p_cur_step; + switch (je->state) + { + case JST_KEY: + DBUG_ASSERT(cur_step->type == JSON_PATH_KEY); + if (!cur_step->wild) + { + json_string_set_str(&key_name, cur_step->key, cur_step->key_end); + if (!json_key_matches(je, &key_name)) + { + if (json_skip_key(je)) + goto exit; + continue; + } + } + if (cur_step == p->last_step || + handle_match(je, p, p_cur_step, array_counters)) + goto exit; + break; + case JST_VALUE: + DBUG_ASSERT(cur_step->type == JSON_PATH_ARRAY); + if (cur_step->wild || + cur_step->n_item == array_counters[cur_step - p->steps]) + { + /* Array item matches. */ + if (cur_step == p->last_step || + handle_match(je, p, p_cur_step, array_counters)) + goto exit; + } + else + { + json_skip_array_item(je); + array_counters[cur_step - p->steps]++; + } + break; + case JST_OBJ_END: + case JST_ARRAY_END: + (*p_cur_step)--; + break; + default: + DBUG_ASSERT(0); + break; + }; + } while (json_scan_next(je) == 0); + + /* No luck. */ + return 1; + +exit: + return je->s.error; +} + + +int json_find_paths_first(json_engine_t *je, json_find_paths_t *state, + uint n_paths, json_path_t *paths, uint *path_depths) +{ + state->n_paths= n_paths; + state->paths= paths; + state->cur_depth= 0; + state->path_depths= path_depths; + return json_find_paths_next(je, state); +} + + +int json_find_paths_next(json_engine_t *je, json_find_paths_t *state) +{ + uint p_c; + int path_found, no_match_found; + do + { + switch (je->state) + { + case JST_KEY: + path_found= FALSE; + no_match_found= TRUE; + for (p_c=0; p_c < state->n_paths; p_c++) + { + json_path_step_t *cur_step; + if (state->path_depths[p_c] < + state->cur_depth /* Path already failed. */ || + (cur_step= state->paths[p_c].steps + state->cur_depth)->type != + JSON_PATH_KEY) + continue; + + if (!cur_step->wild) + { + json_string_t key_name; + json_string_setup(&key_name, state->paths[p_c].s.cs, + cur_step->key, cur_step->key_end); + if (!json_key_matches(je, &key_name)) + continue; + } + if (cur_step - state->paths[p_c].last_step == state->cur_depth) + path_found= TRUE; + else + { + no_match_found= FALSE; + state->path_depths[p_c]= state->cur_depth + 1; + } + } + if (path_found) + /* Return the result. */ + goto exit; + if (no_match_found) + { + /* No possible paths left to check. Just skip the level. */ + if (json_skip_level(je)) + goto exit; + } + + break; + case JST_VALUE: + path_found= FALSE; + no_match_found= TRUE; + for (p_c=0; p_c < state->n_paths; p_c++) + { + json_path_step_t *cur_step; + if (state->path_depths[p_c]< state->cur_depth /* Path already failed. */ || + (cur_step= state->paths[p_c].steps + state->cur_depth)->type != + JSON_PATH_ARRAY) + continue; + if (cur_step->wild || + cur_step->n_item == state->array_counters[state->cur_depth]) + { + /* Array item matches. */ + if (cur_step - state->paths[p_c].last_step == state->cur_depth) + path_found= TRUE; + else + { + no_match_found= FALSE; + state->path_depths[p_c]= state->cur_depth + 1; + } + } + } + + if (path_found) + goto exit; + + if (no_match_found) + json_skip_array_item(je); + + state->array_counters[state->cur_depth]++; + break; + case JST_OBJ_START: + case JST_ARRAY_START: + for (p_c=0; p_c < state->n_paths; p_c++) + { + if (state->path_depths[p_c] < state->cur_depth) + /* Path already failed. */ + continue; + if (state->paths[p_c].steps[state->cur_depth].type == + (je->state == JST_OBJ_START) ? JSON_PATH_KEY : JSON_PATH_ARRAY) + state->path_depths[p_c]++; + } + state->cur_depth++; + break; + case JST_OBJ_END: + case JST_ARRAY_END: + for (p_c=0; p_c < state->n_paths; p_c++) + { + if (state->path_depths[p_c] < state->cur_depth) + continue; + state->path_depths[p_c]--; + } + state->cur_depth--; + break; + default: + DBUG_ASSERT(0); + break; + }; + } while (json_scan_next(je) == 0); + + /* No luck. */ + return 1; + +exit: + return je->s.error; +} + + +int json_append_ascii(CHARSET_INFO *json_cs, + uchar *json, uchar *json_end, + const uchar *ascii, const uchar *ascii_end) +{ + const uchar *json_start= json; + while (ascii < ascii_end) + { + int c_len; + if ((c_len= json_cs->cset->wc_mb(json_cs, (my_wc_t) *ascii, + json, json_end)) > 0) + { + json+= c_len; + ascii++; + continue; + } + + /* Error return. */ + return c_len; + } + + return json - json_start; +} + + +int json_unescape(CHARSET_INFO *json_cs, + const uchar *json_str, const uchar *json_end, + CHARSET_INFO *res_cs, uchar *res, uchar *res_end) +{ + json_string_t s; + json_string_setup(&s, json_cs, json_str, json_end); + while (json_read_string_const_chr(&s) == 0) + { + int c_len; + if ((c_len= res_cs->cset->wc_mb(res_cs, s.c_next, res, res_end)) > 0) + { + res+= c_len; + continue; + } + if (c_len == MY_CS_ILUNI) + { + /* + Result charset doesn't support the json's character. + Let's replace it with the '?' symbol. + */ + if ((c_len= res_cs->cset->wc_mb(res_cs, '?', res, res_end)) > 0) + { + res+= c_len; + continue; + } + } + /* Result buffer is too small. */ + return -1; + } + + return s.error ? 1 : 0; +} + + +/* When we need to replace a character with the escaping. */ +enum json_esc_char_classes { + ESC_= 0, /* No need to escape. */ + ESC_U= 'u', /* Character not allowed in JSON. Always escape as \uXXXX. */ + ESC_B= 'b', /* Backspace. Escape as \b */ + ESC_F= 'f', /* Formfeed. Escape as \f */ + ESC_N= 'n', /* Newline. Escape as \n */ + ESC_R= 'r', /* Return. Escape as \r */ + ESC_T= 't', /* Tab. Escape as \s */ + ESC_BS= '\\' /* Backslash or '"'. Escape by the \\ prefix. */ +}; + + +/* This specifies how we should escape the character. */ +static enum json_esc_char_classes json_escape_chr_map[0x60] = { + ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, + ESC_B, ESC_T, ESC_N, ESC_U, ESC_F, ESC_R, ESC_U, ESC_U, + ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, + ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, ESC_U, + + ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_, ESC_, ESC_, + ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, + ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, + ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, + + ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, + ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, + ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, ESC_, + ESC_, ESC_, ESC_, ESC_, ESC_BS, ESC_, ESC_, ESC_, +}; + + +static const char hexconv[16] = "0123456789ABCDEF"; + + +int json_escape(CHARSET_INFO *str_cs, + const uchar *str, const uchar *str_end, + CHARSET_INFO *json_cs, uchar *json, uchar *json_end) +{ + const uchar *json_start= json; + + while (str < str_end) + { + my_wc_t c_chr; + int c_len; + if ((c_len= str_cs->cset->mb_wc(str_cs, &c_chr, str, str_end)) > 0) + { + enum json_esc_char_classes c_class; + + str+= c_len; + if (c_chr > 0x60 || (c_class= json_escape_chr_map[c_chr]) == ESC_) + { + if ((c_len= json_cs->cset->wc_mb(json_cs, c_chr, json, json_end)) > 0) + { + json+= c_len; + continue; + } + if (c_len < 0) + { + /* JSON buffer is depleted. */ + return -1; + } + + /* JSON charset cannot convert this character. */ + c_class= ESC_U; + } + + if ((c_len= json_cs->cset->wc_mb(json_cs, '\\', json, json_end)) <= 0 || + (c_len= json_cs->cset->wc_mb(json_cs, + (c_class == ESC_BS) ? c_chr : c_class, + json+= c_len, json_end)) <= 0) + { + /* JSON buffer is depleted. */ + return -1; + } + json+= c_len; + + if (c_class != ESC_U) + continue; + + { + /* We have to use /uXXXX escaping. */ + uchar utf16buf[4]; + uchar code_str[8]; + int u_len= my_uni_utf16(0, c_chr, utf16buf, utf16buf + 4); + + code_str[0]= hexconv[utf16buf[0] >> 4]; + code_str[1]= hexconv[utf16buf[0] & 15]; + code_str[2]= hexconv[utf16buf[1] >> 4]; + code_str[3]= hexconv[utf16buf[1] & 15]; + + if (u_len > 2) + { + code_str[4]= hexconv[utf16buf[2] >> 4]; + code_str[5]= hexconv[utf16buf[2] & 15]; + code_str[6]= hexconv[utf16buf[3] >> 4]; + code_str[7]= hexconv[utf16buf[3] & 15]; + } + + if ((c_len= json_append_ascii(json_cs, json, json_end, + code_str, code_str+u_len*2)) > 0) + { + json+= c_len; + continue; + } + /* JSON buffer is depleted. */ + return -1; + } + } + } + + return json - json_start; +} diff --git a/unittest/json_lib/CMakeLists.txt b/unittest/json_lib/CMakeLists.txt new file mode 100644 index 00000000000..2d04025e93e --- /dev/null +++ b/unittest/json_lib/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/sql + ${CMAKE_SOURCE_DIR}/regex + ${CMAKE_SOURCE_DIR}/extra/yassl/include + ${CMAKE_SOURCE_DIR}/unittest/mytap) + +# +MY_ADD_TESTS(json_lib LINK_LIBRARIES strings dbug) diff --git a/unittest/json_lib/json_lib-t.c b/unittest/json_lib/json_lib-t.c new file mode 100644 index 00000000000..49c8f7e34c2 --- /dev/null +++ b/unittest/json_lib/json_lib-t.c @@ -0,0 +1,186 @@ +/* Copyright (c) 2016, MariaDB Corp. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "my_config.h" +#include "config.h" +#include <tap.h> +#include <my_global.h> +#include <my_sys.h> +#include <json_lib.h> + +/* The character set used for JSON all over this test. */ +static CHARSET_INFO *ci; + +#define s_e(j) j, j + strlen((const char *) j) + + +struct st_parse_result +{ + int n_keys; + int n_values; + int n_arrays; + int n_objects; + int n_steps; + int error; + uchar keyname_csum; +}; + + +static void parse_json(const uchar *j, struct st_parse_result *result) +{ + json_engine_t je; + + bzero(result, sizeof(*result)); + + if (json_scan_start(&je, ci, s_e(j))) + return; + + do + { + result->n_steps++; + switch (je.state) + { + case JST_KEY: + result->n_keys++; + while (json_read_keyname_chr(&je) == 0) + { + result->keyname_csum^= je.s.c_next; + } + if (je.s.error) + return; + break; + case JST_VALUE: + result->n_values++; + break; + case JST_OBJ_START: + result->n_objects++; + break; + case JST_ARRAY_START: + result->n_arrays++; + break; + default: + break; + }; + } while (json_scan_next(&je) == 0); + + result->error= je.s.error; +} + + +static const uchar *js0= (const uchar *) "123"; +static const uchar *js1= (const uchar *) "[123, \"text\"]"; +static const uchar *js2= (const uchar *) "{\"key1\":123, \"key2\":\"text\"}"; +static const uchar *js3= (const uchar *) "{\"key1\":{\"ikey1\":321}," + "\"key2\":[\"text\", 321]}"; + +/* + Test json_lib functions to parse JSON. +*/ +static void +test_json_parsing() +{ + struct st_parse_result r; + parse_json(js0, &r); + ok(r.n_steps == 1 && r.n_values == 1, "simple value"); + parse_json(js1, &r); + ok(r.n_steps == 5 && r.n_values == 3 && r.n_arrays == 1, "array"); + parse_json(js2, &r); + ok(r.n_steps == 5 && r.n_keys == 2 && r.n_objects == 1 && r.keyname_csum == 3, + "object"); + parse_json(js3, &r); + ok(r.n_steps == 12 && r.n_keys == 3 && r.n_objects == 2 && + r.n_arrays == 1 && r.keyname_csum == 44, + "complex json"); +} + + +static const uchar *p0= (const uchar *) "$.key1[12].*[*]"; +/* + Test json_lib functions to parse JSON path. +*/ +static void +test_path_parsing() +{ + json_path_t p; + if (json_path_setup(&p, ci, s_e(p0))) + return; + ok(p.last_step - p.steps == 4 && + p.steps[0].type == JSON_PATH_ARRAY && p.steps[0].wild == 1 && + p.steps[1].type == JSON_PATH_KEY && p.steps[1].wild == 0 && + p.steps[2].type == JSON_PATH_ARRAY && p.steps[2].n_item == 12 && + p.steps[3].type == JSON_PATH_KEY && p.steps[3].wild == 1 && + p.steps[4].type == JSON_PATH_ARRAY && p.steps[4].wild == 1, + "path"); +} + + +static const uchar *fj0=(const uchar *) "[{\"k0\":123, \"k1\":123, \"k1\":123}," + " {\"k3\":321, \"k4\":\"text\"}," + " {\"k1\":[\"text\"], \"k2\":123}]"; +static const uchar *fp0= (const uchar *) "$[*].k1"; +/* + Test json_lib functions to search through JSON. +*/ +static void +test_search() +{ + json_engine_t je; + json_path_t p; + json_path_step_t *cur_step; + int n_matches, scal_values; + uint array_counters[JSON_DEPTH_LIMIT]; + + if (json_scan_start(&je, ci, s_e(fj0)) || + json_path_setup(&p, ci, s_e(fp0))) + return; + + cur_step= p.steps; + n_matches= scal_values= 0; + while (json_find_path(&je, &p, &cur_step, array_counters) == 0) + { + n_matches++; + if (json_read_value(&je)) + return; + if (json_value_scalar(&je)) + { + scal_values++; + if (json_scan_next(&je)) + return; + } + else + { + if (json_skip_level(&je) || json_scan_next(&je)) + return; + } + + } + + ok(n_matches == 3, "search"); +} + + +int main() +{ + ci= &my_charset_utf8_general_ci; + + plan(6); + diag("Testing json_lib functions."); + + test_json_parsing(); + test_path_parsing(); + test_search(); + + return exit_status(); +} |