diff options
author | Sergei Petrunia <psergey@askmonty.org> | 2021-11-26 20:03:08 +0300 |
---|---|---|
committer | Sergei Petrunia <psergey@askmonty.org> | 2021-11-26 20:03:08 +0300 |
commit | bfb9f775525c05fa1b160ed1e63c51ee53eb052c (patch) | |
tree | 1efb9c59be966db121445353fd13892d357633ba | |
parent | 71ae8a809499802e1187c07aba4e07fbf8be1b3b (diff) | |
download | mariadb-git-preview-10.7-MDEV-26519-json-histograms.tar.gz |
MDEV-26886: Estimation for filtered rows less precise with JSON histogrampreview-10.7-MDEV-26519-json-histograms
- Make Histogram_json_hb::range_selectivity handle singleton buckets
specially when computing selectivity of the max. endpoint bound.
(for min. endpoint, we already do that).
- Also, fixed comments for Histogram_json_hb::find_bucket
-rw-r--r-- | mysql-test/main/statistics_json.result | 25 | ||||
-rw-r--r-- | mysql-test/main/statistics_json.test | 14 | ||||
-rw-r--r-- | sql/opt_histogram_json.cc | 49 | ||||
-rw-r--r-- | sql/opt_histogram_json.h | 2 |
4 files changed, 67 insertions, 23 deletions
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 99373c93166..537233ef2c2 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -7658,3 +7658,28 @@ test.t1 analyze status OK ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk; INSERT INTO t1 (f) VALUES ('bar'); DROP TABLE t1; +# +# MDEV-26886: Estimation for filtered rows less precise with JSON histogram +# +create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100; +select count(*) from t1 where a <= 0; +count(*) +33 +set histogram_type = JSON_HB, histogram_size=default; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +analyze select * from t1 where a <= 0; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where +analyze select * from t1 where a < 0; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where +analyze select * from t1 where a > 0; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where +analyze select * from t1 where a >= 0; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 100.00 100.00 Using where +drop table t1; diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index c0c88895f05..23e5e7395a7 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -340,3 +340,17 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL; ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk; INSERT INTO t1 (f) VALUES ('bar'); DROP TABLE t1; + +--echo # +--echo # MDEV-26886: Estimation for filtered rows less precise with JSON histogram +--echo # +create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100; +select count(*) from t1 where a <= 0; + +set histogram_type = JSON_HB, histogram_size=default; +analyze table t1 persistent for all; +analyze select * from t1 where a <= 0; +analyze select * from t1 where a < 0; +analyze select * from t1 where a > 0; +analyze select * from t1 where a >= 0; +drop table t1; diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 44b058761a4..e1a4efc6b28 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -743,9 +743,22 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, idx--; } double left_fract= get_left_fract(idx); - double sel= position_in_interval(field, max_key, max_key_len, - buckets[idx].start_value, - get_end_value(idx)); + + double sel; + /* Special handling for singleton buckets */ + if (buckets[idx].ndv == 1 && equal) + { + if (inclusive_endp) + sel= 1.0; + else + sel= 0.0; + } + else + { + sel= position_in_interval(field, max_key, max_key_len, + buckets[idx].start_value, + get_end_value(idx)); + } max= left_fract + sel * (buckets[idx].cum_fract - left_fract); } else @@ -763,26 +776,18 @@ void Histogram_json_hb::serialize(Field *field) /* - Find the rightmost histogram bucket such that "lookup_val $GT start_value". - - $GT is either '>' or '>=' depending on equal_is_less parameter. - - @param equal_is_less Controls what to do if a histogram bound is equal to the - lookup_val. - - @detail - Possible cases: - 1. The regular case: the value falls into some bucket. + @brief + Find the leftmost histogram bucket such that "lookup_val >= start_value". - 2. The value is less than the minimum of the first bucket - 3. The value is greater than the maximum of the last bucket - In these cases we "clip" to the first/last bucket. + @param field Field object (used to do value comparisons) + @param lookup_val The lookup value in KeyTupleFormat. + @param equal OUT TRUE<=> the found bucket has left_bound=lookup_val - 4. The value hits the bucket boundary. Then, we need to know whether the - point of interest is to the left the constant, or to the right of it. + @return + The bucket index */ -int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, +int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val, bool *equal) { int res; @@ -806,10 +811,10 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, } /* - If low and high were assigned a value in the above loop, then they are not - equal to the lookup value: + If low and high were assigned a value in the above loop and we got here, + then they are not equal to the lookup value: - bucket[low] < lookup_val < bucket[high] + bucket[low].start_value < lookup_val < bucket[high].start_value But there are two special cases: low=0 and high=last_bucket. Handle them below. diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h index 6771942e94e..a5499917971 100644 --- a/sql/opt_histogram_json.h +++ b/sql/opt_histogram_json.h @@ -124,6 +124,6 @@ public: private: double get_left_fract(int idx); std::string& get_end_value(int idx); - int find_bucket(Field *field, const uchar *lookup_val, bool *equal); + int find_bucket(const Field *field, const uchar *lookup_val, bool *equal); }; |