diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2014-03-27 13:08:00 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2014-03-27 13:08:00 +0400 |
commit | 79a8a6130b0c43e98a64a1fde8f277e0df06da5d (patch) | |
tree | 2f1f0f604942584382be90140695865f890d88c9 | |
parent | 0d67aafaa2c383b4d2d76f8621109f8adbfb2532 (diff) | |
download | mariadb-git-79a8a6130b0c43e98a64a1fde8f277e0df06da5d.tar.gz |
Code cleanup:
- Move [some] engine-agnostic tests from t/selectivity.test to t/selectivity_no_engine.test
- Move Histogram::point_selectivity to sql_statistics.cc
-rw-r--r-- | mysql-test/r/selectivity.result | 98 | ||||
-rw-r--r-- | mysql-test/r/selectivity_innodb.result | 98 | ||||
-rw-r--r-- | mysql-test/r/selectivity_no_engine.result | 123 | ||||
-rw-r--r-- | mysql-test/t/selectivity.test | 62 | ||||
-rw-r--r-- | mysql-test/t/selectivity_no_engine.test | 96 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 117 | ||||
-rw-r--r-- | sql/sql_statistics.h | 112 |
7 files changed, 337 insertions, 369 deletions
diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index becfc02d6c4..4a96a2e4160 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -1289,104 +1289,6 @@ a b c d a b 221 56120 56120 28296 28296 3 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; drop table t1,t2; -# -# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates -# -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2 (col1 int); -# one value in 1..100 range -insert into t2 select A.a + B.a*10 from t1 A, t1 B; -# ten values in 100...200 range -insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C; -set histogram_type='SINGLE_PREC_HB'; -set histogram_size=100; -set optimizer_use_condition_selectivity=4; -analyze table t2 persistent for all; -Table Op Msg_type Msg_text -test.t2 analyze status OK -# The following two must have the same in 'Extra' column: -explain extended select * from t2 where col1 IN (20, 180); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where -Warnings: -Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180)) -explain extended select * from t2 where col1 IN (180, 20); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where -Warnings: -Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20)) -drop table t1, t2; -# -# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong -# -create table t0(a int); -insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t1(a int); -insert into t1 select A.a from t0 A, t0 B, t0 C; -set histogram_size=20; -set histogram_type='single_prec_hb'; -analyze table t1 persistent for all; -Table Op Msg_type Msg_text -test.t1 analyze status OK -set use_stat_tables='preferably'; -set optimizer_use_condition_selectivity=4; -# Should select about 10%: -explain extended select * from t1 where a=2; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2) -# Should select about 10%: -explain extended select * from t1 where a=1; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1) -# Must not have filtered=100%: -explain extended select * from t1 where a=0; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0) -# Again, must not have filtered=100%: -explain extended select * from t1 where a=-1; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1))) -drop table t0, t1; -# -# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range -# -create table t1 (col1 int); -set @a=-1; -create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100; -insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100; -select min(col1), max(col1), count(*) from t1; -min(col1) max(col1) count(*) -0 99 10000 -set histogram_size=100; -analyze table t1 persistent for all; -Table Op Msg_type Msg_text -test.t1 analyze status OK -explain extended select * from t1 where col1 in (1,2,3); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where -Warnings: -Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3)) -# Must not cause fp division by zero, or produce nonsense numbers: -explain extended select * from t1 where col1 in (-1,-2,-3); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where -Warnings: -Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3)))) -explain extended select * from t1 where col1<=-1; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where -Warnings: -Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1))) -drop table t1, t2; set histogram_type=@save_histogram_type; set histogram_size=@save_histogram_size; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index 1761aa491d2..1abd4e4d392 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -1299,104 +1299,6 @@ a b c d a b 221 56120 56120 28296 28296 3 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; drop table t1,t2; -# -# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates -# -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t2 (col1 int); -# one value in 1..100 range -insert into t2 select A.a + B.a*10 from t1 A, t1 B; -# ten values in 100...200 range -insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C; -set histogram_type='SINGLE_PREC_HB'; -set histogram_size=100; -set optimizer_use_condition_selectivity=4; -analyze table t2 persistent for all; -Table Op Msg_type Msg_text -test.t2 analyze status OK -# The following two must have the same in 'Extra' column: -explain extended select * from t2 where col1 IN (20, 180); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where -Warnings: -Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180)) -explain extended select * from t2 where col1 IN (180, 20); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where -Warnings: -Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20)) -drop table t1, t2; -# -# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong -# -create table t0(a int); -insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t1(a int); -insert into t1 select A.a from t0 A, t0 B, t0 C; -set histogram_size=20; -set histogram_type='single_prec_hb'; -analyze table t1 persistent for all; -Table Op Msg_type Msg_text -test.t1 analyze status OK -set use_stat_tables='preferably'; -set optimizer_use_condition_selectivity=4; -# Should select about 10%: -explain extended select * from t1 where a=2; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2) -# Should select about 10%: -explain extended select * from t1 where a=1; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1) -# Must not have filtered=100%: -explain extended select * from t1 where a=0; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0) -# Again, must not have filtered=100%: -explain extended select * from t1 where a=-1; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where -Warnings: -Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1))) -drop table t0, t1; -# -# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range -# -create table t1 (col1 int); -set @a=-1; -create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100; -insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100; -select min(col1), max(col1), count(*) from t1; -min(col1) max(col1) count(*) -0 99 10000 -set histogram_size=100; -analyze table t1 persistent for all; -Table Op Msg_type Msg_text -test.t1 analyze status OK -explain extended select * from t1 where col1 in (1,2,3); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where -Warnings: -Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3)) -# Must not cause fp division by zero, or produce nonsense numbers: -explain extended select * from t1 where col1 in (-1,-2,-3); -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where -Warnings: -Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3)))) -explain extended select * from t1 where col1<=-1; -id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where -Warnings: -Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1))) -drop table t1, t2; set histogram_type=@save_histogram_type; set histogram_size=@save_histogram_size; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; diff --git a/mysql-test/r/selectivity_no_engine.result b/mysql-test/r/selectivity_no_engine.result new file mode 100644 index 00000000000..107d4b76903 --- /dev/null +++ b/mysql-test/r/selectivity_no_engine.result @@ -0,0 +1,123 @@ +# +# Engine-agnostic tests for statistics-based selectivity calculations. +# - selectivity tests that depend on the engine should go into +# t/selectivity.test. That test is run with myisam/innodb/xtradb. +# - this file is for tests that don't depend on the engine. +# +drop table if exists t0,t1,t2,t3; +select @@global.use_stat_tables; +@@global.use_stat_tables +COMPLEMENTARY +select @@session.use_stat_tables; +@@session.use_stat_tables +COMPLEMENTARY +set @save_use_stat_tables=@@use_stat_tables; +set use_stat_tables='preferably'; +set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; +set @save_histogram_size=@@histogram_size; +set @save_histogram_type=@@histogram_type; +# +# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates +# +create table t1(a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t2 (col1 int); +# one value in 1..100 range +insert into t2 select A.a + B.a*10 from t1 A, t1 B; +# ten values in 100...200 range +insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C; +set histogram_type='SINGLE_PREC_HB'; +set histogram_size=100; +set optimizer_use_condition_selectivity=4; +analyze table t2 persistent for all; +Table Op Msg_type Msg_text +test.t2 analyze status OK +# The following two must have the same in 'Extra' column: +explain extended select * from t2 where col1 IN (20, 180); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180)) +explain extended select * from t2 where col1 IN (180, 20); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20)) +drop table t1, t2; +# +# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong +# +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a int); +insert into t1 select A.a from t0 A, t0 B, t0 C; +set histogram_size=20; +set histogram_type='single_prec_hb'; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status OK +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=4; +# Should select about 10%: +explain extended select * from t1 where a=2; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2) +# Should select about 10%: +explain extended select * from t1 where a=1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1) +# Must not have filtered=100%: +explain extended select * from t1 where a=0; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0) +# Again, must not have filtered=100%: +explain extended select * from t1 where a=-1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 9.52 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1))) +drop table t0, t1; +# +# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range +# +create table t1 (col1 int); +set @a=-1; +create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100; +insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100; +select min(col1), max(col1), count(*) from t1; +min(col1) max(col1) count(*) +0 99 10000 +set histogram_size=100; +analyze table t1 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status OK +explain extended select * from t1 where col1 in (1,2,3); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3)) +# Must not cause fp division by zero, or produce nonsense numbers: +explain extended select * from t1 where col1 in (-1,-2,-3); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 5.94 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3)))) +explain extended select * from t1 where col1<=-1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 1.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1))) +drop table t1, t2; +# +# End of the test file +# +set use_stat_tables= @save_use_stat_tables; +set histogram_type=@save_histogram_type; +set histogram_size=@save_histogram_size; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index 6403c770287..8cb2620550e 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -862,68 +862,6 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit drop table t1,t2; ---echo # ---echo # MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates ---echo # -create table t1(a int); -insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); - -create table t2 (col1 int); ---echo # one value in 1..100 range -insert into t2 select A.a + B.a*10 from t1 A, t1 B; ---echo # ten values in 100...200 range -insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C; - -set histogram_type='SINGLE_PREC_HB'; -set histogram_size=100; -set optimizer_use_condition_selectivity=4; -analyze table t2 persistent for all; - ---echo # The following two must have the same in 'Extra' column: -explain extended select * from t2 where col1 IN (20, 180); -explain extended select * from t2 where col1 IN (180, 20); - -drop table t1, t2; - ---echo # ---echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong ---echo # -create table t0(a int); -insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); -create table t1(a int); -insert into t1 select A.a from t0 A, t0 B, t0 C; -set histogram_size=20; -set histogram_type='single_prec_hb'; -analyze table t1 persistent for all; -set use_stat_tables='preferably'; -set optimizer_use_condition_selectivity=4; ---echo # Should select about 10%: -explain extended select * from t1 where a=2; ---echo # Should select about 10%: -explain extended select * from t1 where a=1; ---echo # Must not have filtered=100%: -explain extended select * from t1 where a=0; ---echo # Again, must not have filtered=100%: -explain extended select * from t1 where a=-1; - -drop table t0, t1; - ---echo # ---echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range ---echo # -create table t1 (col1 int); -set @a=-1; -create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100; -insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100; -select min(col1), max(col1), count(*) from t1; -set histogram_size=100; -analyze table t1 persistent for all; -explain extended select * from t1 where col1 in (1,2,3); ---echo # Must not cause fp division by zero, or produce nonsense numbers: -explain extended select * from t1 where col1 in (-1,-2,-3); -explain extended select * from t1 where col1<=-1; -drop table t1, t2; - set histogram_type=@save_histogram_type; set histogram_size=@save_histogram_size; set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; diff --git a/mysql-test/t/selectivity_no_engine.test b/mysql-test/t/selectivity_no_engine.test new file mode 100644 index 00000000000..e88d822ce46 --- /dev/null +++ b/mysql-test/t/selectivity_no_engine.test @@ -0,0 +1,96 @@ +--source include/have_stat_tables.inc + +--echo # +--echo # Engine-agnostic tests for statistics-based selectivity calculations. +--echo # - selectivity tests that depend on the engine should go into +--echo # t/selectivity.test. That test is run with myisam/innodb/xtradb. +--echo # - this file is for tests that don't depend on the engine. +--echo # + +--disable_warnings +drop table if exists t0,t1,t2,t3; +--enable_warnings + +select @@global.use_stat_tables; +select @@session.use_stat_tables; + +set @save_use_stat_tables=@@use_stat_tables; + +set use_stat_tables='preferably'; + +set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity; +set @save_histogram_size=@@histogram_size; +set @save_histogram_type=@@histogram_type; + +--echo # +--echo # MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates +--echo # +create table t1(a int); +insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t2 (col1 int); +--echo # one value in 1..100 range +insert into t2 select A.a + B.a*10 from t1 A, t1 B; +--echo # ten values in 100...200 range +insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C; + +set histogram_type='SINGLE_PREC_HB'; +set histogram_size=100; +set optimizer_use_condition_selectivity=4; +analyze table t2 persistent for all; + +--echo # The following two must have the same in 'Extra' column: +explain extended select * from t2 where col1 IN (20, 180); +explain extended select * from t2 where col1 IN (180, 20); + +drop table t1, t2; + +--echo # +--echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong +--echo # +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a int); +insert into t1 select A.a from t0 A, t0 B, t0 C; +set histogram_size=20; +set histogram_type='single_prec_hb'; +analyze table t1 persistent for all; +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=4; +--echo # Should select about 10%: +explain extended select * from t1 where a=2; +--echo # Should select about 10%: +explain extended select * from t1 where a=1; +--echo # Must not have filtered=100%: +explain extended select * from t1 where a=0; +--echo # Again, must not have filtered=100%: +explain extended select * from t1 where a=-1; + +drop table t0, t1; + +--echo # +--echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range +--echo # +create table t1 (col1 int); +set @a=-1; +create table t2 (a int) select (@a:=@a+1) as a from information_schema.session_variables A limit 100; +insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100; +select min(col1), max(col1), count(*) from t1; +set histogram_size=100; +analyze table t1 persistent for all; +explain extended select * from t1 where col1 in (1,2,3); +--echo # Must not cause fp division by zero, or produce nonsense numbers: +explain extended select * from t1 where col1 in (-1,-2,-3); +explain extended select * from t1 where col1<=-1; +drop table t1, t2; + + +--echo # +--echo # End of the test file +--echo # + +set use_stat_tables= @save_use_stat_tables; +set histogram_type=@save_histogram_type; +set histogram_size=@save_histogram_size; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; + diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index fd521f7a2ec..67e7a9c304b 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3565,3 +3565,120 @@ double get_column_range_cardinality(Field *field, } return res; } + + + +/* + Estimate selectivity of "col=const" using a histogram + + @param pos Position of the "const" between column's min_value and + max_value. This is a number in [0..1] range. + @param avg_sel Average selectivity of condition "col=const" in this table. + It is calcuated as (#non_null_values / #distinct_values). + + @return + Expected condition selectivity (a number between 0 and 1) + + @notes + [re_zero_length_buckets] If a bucket with zero value-length is in the + middle of the histogram, we will not have min==max. Example: suppose, + pos_value=0x12, and the histogram is: + + #n #n+1 #n+2 + ... 0x10 0x12 0x12 0x14 ... + | + +------------- bucket with zero value-length + + Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula. + + The problem happens at the histogram ends. if pos_value=0, and the + histogram is: + + 0x00 0x10 ... + + then min=0, max=0. This means pos_value is contained within bucket #0, + but on the other hand, histogram data says that the bucket has only one + value. +*/ + +double Histogram::point_selectivity(double pos, double avg_sel) +{ + double sel; + /* Find the bucket that contains the value 'pos'. */ + uint min= find_bucket(pos, TRUE); + uint pos_value= (uint) (pos * prec_factor()); + + /* Find how many buckets this value occupies */ + uint max= min; + while (max + 1 < get_width() && get_value(max + 1) == pos_value) + max++; + + /* + A special case: we're looking at a single bucket, and that bucket has + zero value-length. Use the multi-bucket formula (attempt to use + single-bucket formula will cause divison by zero). + + For more details see [re_zero_length_buckets] above. + */ + if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1))) + max++; + + if (max > min) + { + /* + The value occupies multiple buckets. Use start_bucket ... end_bucket as + selectivity. + */ + double bucket_sel= 1.0/(get_width() + 1); + sel= bucket_sel * (max - min + 1); + } + else + { + /* + The value 'pos' fits within one single histogram bucket. + + Histogram buckets have the same numbers of rows, but they cover + different ranges of values. + + We assume that values are uniformly distributed across the [0..1] value + range. + */ + + /* + If all buckets covered value ranges of the same size, the width of + value range would be: + */ + double avg_bucket_width= 1.0 / (get_width() + 1); + + /* + Let's see what is the width of value range that our bucket is covering. + (min==max currently. they are kept in the formula just in case we + will want to extend it to handle multi-bucket case) + */ + double inv_prec_factor= (double) 1.0 / prec_factor(); + double current_bucket_width= + (max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) - + (min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor)); + + DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */ + + /* + So: + - each bucket has the same #rows + - values are unformly distributed across the [min_value,max_value] domain. + + If a bucket has value range that's N times bigger then average, than + each value will have to have N times fewer rows than average. + */ + sel= avg_sel * avg_bucket_width / current_bucket_width; + + /* + (Q: if we just follow this proportion we may end up in a situation + where number of different values we expect to find in this bucket + exceeds the number of rows that this histogram has in a bucket. Are + we ok with this or we would want to have certain caps?) + */ + } + return sel; +} + diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index d0db0a3bf33..331e3559203 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -241,120 +241,10 @@ public: return sel; } - /* Estimate selectivity of "col=const" using a histogram - - @param pos Position of the "const" between column's min_value and - max_value. This is a number in [0..1] range. - @param avg_sel Average selectivity of condition "col=const" in this table. - It is calcuated as (#non_null_values / #distinct_values). - - @return - Expected condition selectivity (a number between 0 and 1) - - @notes - [re_zero_length_buckets] If a bucket with zero value-length is in the - middle of the histogram, we will not have min==max. Example: suppose, - pos_value=0x12, and the histogram is: - - #n #n+1 #n+2 - ... 0x10 0x12 0x12 0x14 ... - | - +------------- bucket with zero value-length - - Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula. - - The problem happens at the histogram ends. if pos_value=0, and the - histogram is: - - 0x00 0x10 ... - - then min=0, max=0. This means pos_value is contained within bucket #0, - but on the other hand, histogram data says that the bucket has only one - value. */ - - double point_selectivity(double pos, double avg_sel) - { - double sel; - /* Find the bucket that contains the value 'pos'. */ - uint min= find_bucket(pos, TRUE); - uint pos_value= (uint) (pos * prec_factor()); - - /* Find how many buckets this value occupies */ - uint max= min; - while (max + 1 < get_width() && get_value(max + 1) == pos_value) - max++; - - /* - A special case: we're looking at a single bucket, and that bucket has - zero value-length. Use the multi-bucket formula (attempt to use - single-bucket formula will cause divison by zero). - - For more details see [re_zero_length_buckets] above. - */ - if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1))) - max++; - - if (max > min) - { - /* - The value occupies multiple buckets. Use start_bucket ... end_bucket as - selectivity. - */ - double bucket_sel= 1.0/(get_width() + 1); - sel= bucket_sel * (max - min + 1); - } - else - { - /* - The value 'pos' fits within one single histogram bucket. - - Histogram buckets have the same numbers of rows, but they cover - different ranges of values. - - We assume that values are uniformly distributed across the [0..1] value - range. - */ - - /* - If all buckets covered value ranges of the same size, the width of - value range would be: - */ - double avg_bucket_width= 1.0 / (get_width() + 1); - - /* - Let's see what is the width of value range that our bucket is covering. - (min==max currently. they are kept in the formula just in case we - will want to extend it to handle multi-bucket case) - */ - double inv_prec_factor= (double) 1.0 / prec_factor(); - double current_bucket_width= - (max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) - - (min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor)); - - DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */ - - /* - So: - - each bucket has the same #rows - - values are unformly distributed across the [min_value,max_value] domain. - - If a bucket has value range that's N times bigger then average, than - each value will have to have N times fewer rows than average. - */ - sel= avg_sel * avg_bucket_width / current_bucket_width; - - /* - (Q: if we just follow this proportion we may end up in a situation - where number of different values we expect to find in this bucket - exceeds the number of rows that this histogram has in a bucket. Are - we ok with this or we would want to have certain caps?) - */ - } - return sel; - } + double point_selectivity(double pos, double avg_sel); }; |