diff options
-rw-r--r-- | mysql-test/main/selectivity.result | 66 | ||||
-rw-r--r-- | mysql-test/main/selectivity.test | 74 | ||||
-rw-r--r-- | mysql-test/main/selectivity_no_engine.result | 6 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 13 |
4 files changed, 151 insertions, 8 deletions
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index b5a48d341ae..9836c832ca7 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -1946,4 +1946,70 @@ set histogram_size=@save_histogram_size; set use_stat_tables= @save_use_stat_tables; DROP TABLE t1; # End of 10.2 tests +# +# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram +# +create table t0(a int); +insert into t0 select 1 from seq_1_to_78; +create table t1(a int); +insert into t1 select 1 from seq_1_to_26; +create table t10 (a int); +insert into t10 select 0 from t0, seq_1_to_4; +insert into t10 select 8693 from t1; +insert into t10 select 8694 from t1; +insert into t10 select 8695 from t1; +insert into t10 select 34783 from t1; +insert into t10 select 34784 from t1; +insert into t10 select 34785 from t1; +insert into t10 select 34785 from t0, seq_1_to_8; +insert into t10 select 65214 from t1; +insert into t10 select 65215 from t1; +insert into t10 select 65216 from t1; +insert into t10 select 65216 from t0, seq_1_to_52; +insert into t10 select 65217 from t1; +insert into t10 select 65218 from t1; +insert into t10 select 65219 from t1; +insert into t10 select 65219 from t0; +insert into t10 select 73913 from t1; +insert into t10 select 73914 from t1; +insert into t10 select 73915 from t1; +insert into t10 select 73915 from t0, seq_1_to_40; +insert into t10 select 78257 from t1; +insert into t10 select 78258 from t1; +insert into t10 select 78259 from t1; +insert into t10 select 91300 from t1; +insert into t10 select 91301 from t1; +insert into t10 select 91302 from t1; +insert into t10 select 91302 from t0, seq_1_to_6; +insert into t10 select 91303 from t1; +insert into t10 select 91304 from t1; +insert into t10 select 91305 from t1; +insert into t10 select 91305 from t0, seq_1_to_8; +insert into t10 select 99998 from t1; +insert into t10 select 99999 from t1; +insert into t10 select 100000 from t1; +analyze table t10 persistent for all; +Table Op Msg_type Msg_text +test.t10 analyze status Engine-independent statistics collected +test.t10 analyze status OK +flush tables; +set optimizer_trace=1; +explain select * from t10 where a in (91303); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where +# Must have selectivity_from_histogram <= 1.0: +select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) +from information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.selectivity_for_columns')) +[ + [ + { + "column_name": "a", + "ranges": + ["91303 <= a <= 91303"], + "selectivity_from_histogram": 0.0078125 + } + ] +] +drop table t0,t1,t10; set @@global.histogram_size=@save_histogram_size; diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test index 4e4513d09d6..e8f265ca51b 100644 --- a/mysql-test/main/selectivity.test +++ b/mysql-test/main/selectivity.test @@ -1329,6 +1329,80 @@ DROP TABLE t1; --echo # End of 10.2 tests +--echo # +--echo # MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram +--echo # +create table t0(a int); # This holds how many rows we hold in a bucket. +insert into t0 select 1 from seq_1_to_78; + +create table t1(a int); # one-third of a bucket +insert into t1 select 1 from seq_1_to_26; + +create table t10 (a int); +insert into t10 select 0 from t0, seq_1_to_4; + +insert into t10 select 8693 from t1; +insert into t10 select 8694 from t1; +insert into t10 select 8695 from t1; + + +insert into t10 select 34783 from t1; +insert into t10 select 34784 from t1; +insert into t10 select 34785 from t1; + + +insert into t10 select 34785 from t0, seq_1_to_8; + +insert into t10 select 65214 from t1; +insert into t10 select 65215 from t1; +insert into t10 select 65216 from t1; + +insert into t10 select 65216 from t0, seq_1_to_52; + +insert into t10 select 65217 from t1; +insert into t10 select 65218 from t1; +insert into t10 select 65219 from t1; + +insert into t10 select 65219 from t0; + + +insert into t10 select 73913 from t1; +insert into t10 select 73914 from t1; +insert into t10 select 73915 from t1; + +insert into t10 select 73915 from t0, seq_1_to_40; + + +insert into t10 select 78257 from t1; +insert into t10 select 78258 from t1; +insert into t10 select 78259 from t1; + +insert into t10 select 91300 from t1; +insert into t10 select 91301 from t1; +insert into t10 select 91302 from t1; + +insert into t10 select 91302 from t0, seq_1_to_6; + +insert into t10 select 91303 from t1; # Only 1/3rd of bucket matches the search tuple +insert into t10 select 91304 from t1; +insert into t10 select 91305 from t1; + +insert into t10 select 91305 from t0, seq_1_to_8; + +insert into t10 select 99998 from t1; +insert into t10 select 99999 from t1; +insert into t10 select 100000 from t1; + +analyze table t10 persistent for all; +flush tables; +set optimizer_trace=1; +explain select * from t10 where a in (91303); +--echo # Must have selectivity_from_histogram <= 1.0: +select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) +from information_schema.optimizer_trace; + +drop table t0,t1,t10; + # # Clean up # diff --git a/mysql-test/main/selectivity_no_engine.result b/mysql-test/main/selectivity_no_engine.result index 3811b12a1be..fe8646ac5bf 100644 --- a/mysql-test/main/selectivity_no_engine.result +++ b/mysql-test/main/selectivity_no_engine.result @@ -36,12 +36,12 @@ test.t2 analyze status OK # The following two must have the same in 'Extra' column: explain extended select * from t2 where col1 IN (20, 180); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.08 Using where Warnings: Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (20,180) explain extended select * from t2 where col1 IN (180, 20); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.08 Using where Warnings: Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (180,20) drop table t1, t2; @@ -102,7 +102,7 @@ test.t1 analyze status Engine-independent statistics collected test.t1 analyze status OK explain extended select * from t1 where col1 in (1,2,3); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 2.82 Using where Warnings: Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` in (1,2,3) # Must not cause fp division by zero, or produce nonsense numbers: diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index be011adb60c..5aca25d7a1b 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3932,7 +3932,8 @@ double Histogram::point_selectivity(double pos, double avg_sel) /* So: - each bucket has the same #rows - - values are unformly distributed across the [min_value,max_value] domain. + - We assume that values are unformly distributed across the + [min_value,max_value] domain. If a bucket has value range that's N times bigger then average, than each value will have to have N times fewer rows than average. @@ -3940,11 +3941,13 @@ double Histogram::point_selectivity(double pos, double avg_sel) sel= avg_sel * avg_bucket_width / current_bucket_width; /* - (Q: if we just follow this proportion we may end up in a situation - where number of different values we expect to find in this bucket - exceeds the number of rows that this histogram has in a bucket. Are - we ok with this or we would want to have certain caps?) + Note that this adjustment is just a (brave?) heuristic. What we know for + certain is that the searched value fits into one histogram bucket. Do not + return an estimate larger than that. */ + double bucket_sel= 1.0/(get_width() + 1); + if (sel >= bucket_sel) + sel= bucket_sel; } return sel; } |