diff options
-rw-r--r-- | mysql-test/main/statistics_json.result | 35 | ||||
-rw-r--r-- | mysql-test/main/statistics_json.test | 13 | ||||
-rw-r--r-- | sql/share/errmsg-utf8.txt | 2 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 95 |
4 files changed, 77 insertions, 68 deletions
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 8f601422557..c39aeb84e1b 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -67,6 +67,41 @@ test t1 d 1 25 0.0000 8.0000 0.0000 10 JSON [ "21", "23" ] +SELECT * FROM t1; +a b c d +1 1 1 1 +2 2 2 2 +3 3 3 3 +4 4 4 4 +5 5 5 5 +6 6 6 6 +7 7 7 7 +8 8 8 8 +9 9 9 9 +10 10 10 10 +11 11 11 11 +12 12 12 12 +13 13 13 13 +14 14 14 14 +15 15 15 15 +16 16 16 16 +17 17 17 17 +18 18 18 18 +19 19 19 19 +20 20 20 20 +21 21 21 21 +22 22 22 22 +23 23 23 23 +24 24 24 24 +25 25 25 25 +UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1'; +FLUSH TABLES; +SELECT * FROM t1; +ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'. +UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1'; +FLUSH TABLES; +SELECT * FROM t1; +ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32608'. DELETE FROM mysql.column_stats; DROP TABLE t1; create schema world; diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index acc44456d8f..64b3b83e5f7 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -28,6 +28,19 @@ set histogram_size=10; ANALYZE TABLE t1 PERSISTENT FOR ALL; SELECT * FROM mysql.column_stats WHERE table_name='t1'; +SELECT * FROM t1; + +# We then test different valid JSON strings that are invalid histograms. +UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1'; +FLUSH TABLES; +--error ER_JSON_HISTOGRAM_PARSE_FAILED +SELECT * FROM t1; + +UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1'; +FLUSH TABLES; +--error ER_JSON_HISTOGRAM_PARSE_FAILED +SELECT * FROM t1; + DELETE FROM mysql.column_stats; DROP TABLE t1; diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 2d90793b90f..bcb3cc88c49 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -8913,3 +8913,5 @@ ER_PARTITION_CONVERT_SUBPARTITIONED eng "Convert partition is not supported for subpartitioned table." ER_PROVIDER_NOT_LOADED eng "MariaDB tried to use the %s, but its provider plugin is not loaded" +ER_JSON_HISTOGRAM_PARSE_FAILED + eng "Failed to parse histogram, encountered JSON_TYPE '%d'." diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 5cc31aa1b71..818d2b8d492 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -67,15 +67,11 @@ * json_get_array_items expects a JSON array as argument, * and pushes the elements of the array into the `container` vector. * It only works if all the elements in the original JSON array - * are scalar values (i.e., strings, numbers, true or false), and returns JSV_BAD_JSON if: - * the original JSON is not an array OR the JSON array contains non-scalar elements. + * are scalar values (i.e., strings, numbers, true or false), + * else, the JSON type encountered is stored in value_type and the function returns false. */ bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container); -std::vector<std::string> parse_histogram_from_json(const char *json); - -void test_parse_histogram_from_json(); - Histogram_base *create_histogram(Histogram_type hist_type); /* Currently there are only 3 persistent statistical tables */ @@ -1221,18 +1217,29 @@ public: of read_stats->histogram. */ - Histogram_binary * load_histogram(MEM_ROOT *mem_root) + Histogram_base * load_histogram(MEM_ROOT *mem_root) { if (find_stat()) { char buff[MAX_FIELD_WIDTH]; String val(buff, sizeof(buff), &my_charset_bin); uint fldno= COLUMN_STAT_HISTOGRAM; + Histogram_base *hist; Field *stat_field= stat_table->field[fldno]; table_field->read_stats->set_not_null(fldno); stat_field->val_str(&val); - // histogram-todo: here, create the histogram of appropriate type. - Histogram_binary *hist= new (mem_root) Histogram_binary(); + switch (table_field->read_stats->histogram_type_on_disk) + { + case SINGLE_PREC_HB: + case DOUBLE_PREC_HB: + hist = new (mem_root) Histogram_binary(); + break; + case JSON: + hist = new (mem_root) Histogram_json(); + break; + default: + return NULL; + } if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk, (const uchar*)val.ptr(), val.length())) { @@ -1283,21 +1290,17 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp bool Histogram_json::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size_arg) { + DBUG_ENTER("Histogram_json::parse"); type = type_arg; - // I think we could use memcpy here, but not sure about how to get the right size - // since we can't depend on size_arg (it's zero for json histograms) - // also, does it make sense to cast here? or we can modify json_get_array_items - // to accept uchar* const char *json = (char *)ptr; int vt; bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets); - fprintf(stderr,"==============\n"); - fprintf(stderr,"histogram: %s\n", json); - fprintf(stderr, "json_get_array_items() returned %s\n", result ? "true" : "false"); - fprintf(stderr, "value type after json_get_array_items() is %d\n", vt); - fprintf(stderr, " JSV_BAD_JSON=%d, JSON_VALUE_ARRAY=%d\n", (int)JSV_BAD_JSON, (int)JSON_VALUE_ARRAY); - fprintf(stderr, "hist_buckets.size()=%zu\n", hist_buckets.size()); - return false; + if (!result) + { + my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), vt); + DBUG_RETURN(true); + } + DBUG_RETURN(false); } void Histogram_json::serialize(Field *field) @@ -1753,11 +1756,6 @@ public: histogram->set_size(bucket_bounds.size()); Binary_string *json_string = (Binary_string *) writer->output.get_string(); histogram->set_values((uchar *) json_string->c_ptr()); - - std::vector<std::string> buckets = parse_histogram_from_json(json_string->c_ptr()); - printf("%zu", buckets.size()); - - test_parse_histogram_from_json(); } }; @@ -1770,41 +1768,6 @@ Histogram_base *create_histogram(Histogram_type hist_type) return new Histogram_binary; } -void test_parse_histogram_from_json() -{ - std::vector<std::string> bucket = {}; - std::string json; - std::string tests[7] = { - R"(["aabbb", "ccccdd", "eeefff"])", - R"(["aabbb", "ccc{}dd", "eeefff"])", - R"(["aabbb", {"a": "b"}, "eeefff"])", - R"({})", - R"([1,2,3, null])", - R"([null])", - R"([])" - }; - - for(const auto& test : tests) { - json = test; - bucket = parse_histogram_from_json(json.c_str()); - } -} - -std::vector<std::string> parse_histogram_from_json(const char *json) -{ - std::vector<std::string> hist_buckets= {}; - int vt; - bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets); - fprintf(stderr,"==============\n"); - fprintf(stderr,"histogram: %s\n", json); - fprintf(stderr, "json_get_array_items() returned %s\n", result ? "true" : "false"); - fprintf(stderr, "value type after json_get_array_items() is %d\n", vt); - fprintf(stderr, " JSV_BAD_JSON=%d, JSON_VALUE_ARRAY=%d\n", (int)JSV_BAD_JSON, (int)JSON_VALUE_ARRAY); - fprintf(stderr, "hist_buckets.size()=%zu\n", hist_buckets.size()); - - return hist_buckets; -} - bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container) { json_engine_t je; int vl; @@ -1814,7 +1777,6 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ if (json_read_value(&je) || je.value_type != JSON_VALUE_ARRAY) { - *value_type = JSV_BAD_JSON; return false; } *value_type = je.value_type; @@ -1831,16 +1793,15 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ je.value_type != JSON_VALUE_TRUE && je.value_type != JSON_VALUE_FALSE) { - *value_type = JSV_BAD_JSON; return false; } val = std::string(v, vl); container.emplace_back(val); + break; case JST_ARRAY_END: break; } } - return true; } @@ -3408,7 +3369,7 @@ int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) //table_field->read_stats->histogram.set_values(histogram); table_field->read_stats->histogram_= - column_stat.load_histogram(&stats_cb->mem_root); + column_stat.load_histogram(&stats_cb->mem_root); //histogram+= hist_size; } } @@ -4101,8 +4062,7 @@ double get_column_range_cardinality(Field *field, if (avg_frequency > 1.0 + 0.000001 && col_stats->min_max_values_are_provided()) { - Histogram_binary *hist= - dynamic_cast<Histogram_binary *>(col_stats->histogram_); + Histogram_base *hist = col_stats->histogram_; if (hist && hist->is_usable(thd)) { store_key_image_to_rec(field, (uchar *) min_endp->key, @@ -4146,8 +4106,7 @@ double get_column_range_cardinality(Field *field, else max_mp_pos= 1.0; - Histogram_binary *hist= - dynamic_cast<Histogram_binary *>(col_stats->histogram_); + Histogram_base *hist = col_stats->histogram_; if (hist && hist->is_usable(thd)) sel= hist->range_selectivity(min_mp_pos, max_mp_pos); else |