Code cleanup:

- Move [some] engine-agnostic tests from t/selectivity.test to t/selectivity_no_engine.test - Move Histogram::point_selectivity to sql_statistics.cc
author: Sergey Petrunya <psergey@askmonty.org> 2014-03-27 13:08:00 +0400
committer: Sergey Petrunya <psergey@askmonty.org> 2014-03-27 13:08:00 +0400
commit: 79a8a6130b0c43e98a64a1fde8f277e0df06da5d (patch)
tree: 2f1f0f604942584382be90140695865f890d88c9
parent: 0d67aafaa2c383b4d2d76f8621109f8adbfb2532 (diff)
download: mariadb-git-79a8a6130b0c43e98a64a1fde8f277e0df06da5d.tar.gz
7 files changed, 337 insertions, 369 deletions
diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
index becfc02d6c4..4a96a2e4160 100644
--- a/mysql-test/r/selectivity.result
+++ b/mysql-test/r/selectivity.result
@@ -1289,104 +1289,6 @@ a	b	c	d	a	b
 221	56120	56120	28296	28296	3
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
 drop table t1,t2;
-#
-# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
-#
-create table t1(a int);
-insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-create table t2 (col1 int);
-# one value in 1..100 range
-insert into t2 select A.a + B.a*10 from t1 A, t1 B;
-# ten values in 100...200 range
-insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
-set histogram_type='SINGLE_PREC_HB';
-set histogram_size=100;
-set optimizer_use_condition_selectivity=4;
-analyze table t2 persistent for all;
-Table	Op	Msg_type	Msg_text
-test.t2	analyze	status	OK
-# The following two must have the same in 'Extra' column:
-explain extended select * from t2 where col1 IN (20, 180);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
-Warnings:
-Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
-explain extended select * from t2 where col1 IN (180, 20);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
-Warnings:
-Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
-drop table t1, t2;
-#
-# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
-#
-create table t0(a int);
-insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-create table t1(a int);
-insert into t1 select A.a from t0 A, t0 B, t0 C;
-set histogram_size=20;
-set histogram_type='single_prec_hb';
-analyze table t1 persistent for all;
-Table	Op	Msg_type	Msg_text
-test.t1	analyze	status	OK
-set use_stat_tables='preferably';
-set optimizer_use_condition_selectivity=4;
-# Should select about 10%:
-explain extended select * from t1 where a=2;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
-# Should select about 10%:
-explain extended select * from t1 where a=1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
-# Must not have filtered=100%:
-explain extended select * from t1 where a=0;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
-# Again, must not have filtered=100%:
-explain extended select * from t1 where a=-1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
-drop table t0, t1;
-#
-# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
-#
-create table t1 (col1 int);
-set @a=-1;
-create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
-insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
-select min(col1), max(col1), count(*) from t1;
-min(col1)	max(col1)	count(*)
-0	99	10000
-set histogram_size=100;
-analyze table t1 persistent for all;
-Table	Op	Msg_type	Msg_text
-test.t1	analyze	status	OK
-explain extended select * from t1 where col1 in (1,2,3);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
-# Must not cause fp division by zero, or produce nonsense numbers:
-explain extended select * from t1 where col1 in (-1,-2,-3);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	5.94	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
-explain extended select * from t1 where col1<=-1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	1.00	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
-drop table t1, t2;
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index 1761aa491d2..1abd4e4d392 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -1299,104 +1299,6 @@ a	b	c	d	a	b
 221	56120	56120	28296	28296	3
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
 drop table t1,t2;
-#
-# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
-#
-create table t1(a int);
-insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-create table t2 (col1 int);
-# one value in 1..100 range
-insert into t2 select A.a + B.a*10 from t1 A, t1 B;
-# ten values in 100...200 range
-insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
-set histogram_type='SINGLE_PREC_HB';
-set histogram_size=100;
-set optimizer_use_condition_selectivity=4;
-analyze table t2 persistent for all;
-Table	Op	Msg_type	Msg_text
-test.t2	analyze	status	OK
-# The following two must have the same in 'Extra' column:
-explain extended select * from t2 where col1 IN (20, 180);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
-Warnings:
-Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
-explain extended select * from t2 where col1 IN (180, 20);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
-Warnings:
-Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
-drop table t1, t2;
-#
-# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
-#
-create table t0(a int);
-insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-create table t1(a int);
-insert into t1 select A.a from t0 A, t0 B, t0 C;
-set histogram_size=20;
-set histogram_type='single_prec_hb';
-analyze table t1 persistent for all;
-Table	Op	Msg_type	Msg_text
-test.t1	analyze	status	OK
-set use_stat_tables='preferably';
-set optimizer_use_condition_selectivity=4;
-# Should select about 10%:
-explain extended select * from t1 where a=2;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
-# Should select about 10%:
-explain extended select * from t1 where a=1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
-# Must not have filtered=100%:
-explain extended select * from t1 where a=0;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
-# Again, must not have filtered=100%:
-explain extended select * from t1 where a=-1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
-drop table t0, t1;
-#
-# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
-#
-create table t1 (col1 int);
-set @a=-1;
-create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
-insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
-select min(col1), max(col1), count(*) from t1;
-min(col1)	max(col1)	count(*)
-0	99	10000
-set histogram_size=100;
-analyze table t1 persistent for all;
-Table	Op	Msg_type	Msg_text
-test.t1	analyze	status	OK
-explain extended select * from t1 where col1 in (1,2,3);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
-# Must not cause fp division by zero, or produce nonsense numbers:
-explain extended select * from t1 where col1 in (-1,-2,-3);
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	5.94	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
-explain extended select * from t1 where col1<=-1;
-id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	1.00	Using where
-Warnings:
-Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
-drop table t1, t2;
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/r/selectivity_no_engine.result b/mysql-test/r/selectivity_no_engine.result
new file mode 100644
index 00000000000..107d4b76903
--- /dev/null
+++ b/mysql-test/r/selectivity_no_engine.result
@@ -0,0 +1,123 @@
+#
+#  Engine-agnostic tests for statistics-based selectivity calculations.
+#   - selectivity tests that depend on the engine should go into
+#     t/selectivity.test. That test is run with myisam/innodb/xtradb.
+#   - this file is for tests that don't depend on the engine. 
+#
+drop table if exists t0,t1,t2,t3;
+select @@global.use_stat_tables;
+@@global.use_stat_tables
+COMPLEMENTARY
+select @@session.use_stat_tables;
+@@session.use_stat_tables
+COMPLEMENTARY
+set @save_use_stat_tables=@@use_stat_tables;
+set use_stat_tables='preferably';
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set @save_histogram_size=@@histogram_size;
+set @save_histogram_type=@@histogram_type;
+#
+# MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
+#
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (col1 int);
+# one value in 1..100 range
+insert into t2 select A.a + B.a*10 from t1 A, t1 B;
+# ten values in 100...200 range
+insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
+set histogram_type='SINGLE_PREC_HB';
+set histogram_size=100;
+set optimizer_use_condition_selectivity=4;
+analyze table t2 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	OK
+# The following two must have the same in 'Extra' column:
+explain extended select * from t2 where col1 IN (20, 180);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
+Warnings:
+Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (20,180))
+explain extended select * from t2 where col1 IN (180, 20);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
+Warnings:
+Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where (`test`.`t2`.`col1` in (180,20))
+drop table t1, t2;
+#
+# MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a from t0 A, t0 B, t0 C;
+set histogram_size=20;
+set histogram_type='single_prec_hb';
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=4;
+# Should select about 10%:
+explain extended select * from t1 where a=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 2)
+# Should select about 10%:
+explain extended select * from t1 where a=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 1)
+# Must not have filtered=100%:
+explain extended select * from t1 where a=0;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = 0)
+# Again, must not have filtered=100%:
+explain extended select * from t1 where a=-1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	9.52	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` = <cache>(-(1)))
+drop table t0, t1;
+#
+# MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
+#
+create table t1 (col1 int);
+set @a=-1;
+create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
+insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
+select min(col1), max(col1), count(*) from t1;
+min(col1)	max(col1)	count(*)
+0	99	10000
+set histogram_size=100;
+analyze table t1 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+explain extended select * from t1 where col1 in (1,2,3);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (1,2,3))
+# Must not cause fp division by zero, or produce nonsense numbers:
+explain extended select * from t1 where col1 in (-1,-2,-3);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	5.94	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` in (<cache>(-(1)),<cache>(-(2)),<cache>(-(3))))
+explain extended select * from t1 where col1<=-1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	1.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where (`test`.`t1`.`col1` <= <cache>(-(1)))
+drop table t1, t2;
+# 
+# End of the test file
+# 
+set use_stat_tables= @save_use_stat_tables;
+set histogram_type=@save_histogram_type;
+set histogram_size=@save_histogram_size;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test
index 6403c770287..8cb2620550e 100644
--- a/mysql-test/t/selectivity.test
+++ b/mysql-test/t/selectivity.test
@@ -862,68 +862,6 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit
 
 drop table t1,t2;
 
---echo #
---echo # MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
---echo #
-create table t1(a int);
-insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-
-create table t2 (col1 int);
---echo # one value in 1..100 range
-insert into t2 select A.a + B.a*10 from t1 A, t1 B;
---echo # ten values in 100...200 range
-insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
-
-set histogram_type='SINGLE_PREC_HB';
-set histogram_size=100;
-set optimizer_use_condition_selectivity=4;
-analyze table t2 persistent for all;
-
---echo # The following two must have the same in 'Extra' column:
-explain extended select * from t2 where col1 IN (20, 180);
-explain extended select * from t2 where col1 IN (180, 20);
-
-drop table t1, t2;
-
---echo #
---echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
---echo #
-create table t0(a int);
-insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
-create table t1(a int);
-insert into t1 select A.a from t0 A, t0 B, t0 C;
-set histogram_size=20;
-set histogram_type='single_prec_hb';
-analyze table t1 persistent for all;
-set use_stat_tables='preferably';
-set optimizer_use_condition_selectivity=4;
---echo # Should select about 10%:
-explain extended select * from t1 where a=2;
---echo # Should select about 10%:
-explain extended select * from t1 where a=1;
---echo # Must not have filtered=100%:
-explain extended select * from t1 where a=0;
---echo # Again, must not have filtered=100%:
-explain extended select * from t1 where a=-1;
-
-drop table t0, t1;
-
---echo #
---echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
---echo #
-create table t1 (col1 int);
-set @a=-1;
-create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
-insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
-select min(col1), max(col1), count(*) from t1;
-set histogram_size=100;
-analyze table t1 persistent for all;
-explain extended select * from t1 where col1 in (1,2,3);
---echo # Must not cause fp division by zero, or produce nonsense numbers:
-explain extended select * from t1 where col1 in (-1,-2,-3);
-explain extended select * from t1 where col1<=-1;
-drop table t1, t2;
-
 set histogram_type=@save_histogram_type;
 set histogram_size=@save_histogram_size;
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
diff --git a/mysql-test/t/selectivity_no_engine.test b/mysql-test/t/selectivity_no_engine.test
new file mode 100644
index 00000000000..e88d822ce46
--- /dev/null
+++ b/mysql-test/t/selectivity_no_engine.test
@@ -0,0 +1,96 @@
+--source include/have_stat_tables.inc
+
+--echo #
+--echo #  Engine-agnostic tests for statistics-based selectivity calculations.
+--echo #   - selectivity tests that depend on the engine should go into
+--echo #     t/selectivity.test. That test is run with myisam/innodb/xtradb.
+--echo #   - this file is for tests that don't depend on the engine. 
+--echo #
+
+--disable_warnings
+drop table if exists t0,t1,t2,t3;
+--enable_warnings
+
+select @@global.use_stat_tables;
+select @@session.use_stat_tables;
+
+set @save_use_stat_tables=@@use_stat_tables;
+
+set use_stat_tables='preferably';
+
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set @save_histogram_size=@@histogram_size;
+set @save_histogram_type=@@histogram_type;
+
+--echo #
+--echo # MDEV-5917: EITS: different order of predicates in IN (...) causes different estimates
+--echo #
+create table t1(a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t2 (col1 int);
+--echo # one value in 1..100 range
+insert into t2 select A.a + B.a*10 from t1 A, t1 B;
+--echo # ten values in 100...200 range
+insert into t2 select 100 + A.a + B.a*10 from t1 A, t1 B, t1 C;
+
+set histogram_type='SINGLE_PREC_HB';
+set histogram_size=100;
+set optimizer_use_condition_selectivity=4;
+analyze table t2 persistent for all;
+
+--echo # The following two must have the same in 'Extra' column:
+explain extended select * from t2 where col1 IN (20, 180);
+explain extended select * from t2 where col1 IN (180, 20);
+
+drop table t1, t2;
+
+--echo #
+--echo # MDEV-5926: EITS: Histogram estimates for column=least_possible_value are wrong
+--echo #
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a from t0 A, t0 B, t0 C;
+set histogram_size=20;
+set histogram_type='single_prec_hb';
+analyze table t1 persistent for all;
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=4;
+--echo # Should select about 10%:
+explain extended select * from t1 where a=2;
+--echo # Should select about 10%:
+explain extended select * from t1 where a=1;
+--echo # Must not have filtered=100%:
+explain extended select * from t1 where a=0;
+--echo # Again, must not have filtered=100%:
+explain extended select * from t1 where a=-1;
+
+drop table t0, t1;
+
+--echo #
+--echo # MDEV-4362: Selectivity estimates for IN (...) do not depend on whether the values are in range
+--echo #
+create table t1 (col1 int);
+set @a=-1;
+create table t2 (a int)  select (@a:=@a+1) as a from information_schema.session_variables A limit 100;
+insert into t1 select A.a from t2 A, t2 B where A.a < 100 and B.a < 100;
+select min(col1), max(col1), count(*) from t1;
+set histogram_size=100;
+analyze table t1 persistent for all;
+explain extended select * from t1 where col1 in (1,2,3);
+--echo # Must not cause fp division by zero, or produce nonsense numbers:
+explain extended select * from t1 where col1 in (-1,-2,-3);
+explain extended select * from t1 where col1<=-1;
+drop table t1, t2;
+
+
+--echo # 
+--echo # End of the test file
+--echo # 
+
+set use_stat_tables= @save_use_stat_tables;
+set histogram_type=@save_histogram_type;
+set histogram_size=@save_histogram_size;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index fd521f7a2ec..67e7a9c304b 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3565,3 +3565,120 @@ double get_column_range_cardinality(Field *field,
   }
   return res;
 }
+
+
+
+/*
+  Estimate selectivity of "col=const" using a histogram
+  
+  @param pos      Position of the "const" between column's min_value and 
+                  max_value.  This is a number in [0..1] range.
+  @param avg_sel  Average selectivity of condition "col=const" in this table.
+                  It is calcuated as (#non_null_values / #distinct_values).
+  
+  @return
+     Expected condition selectivity (a number between 0 and 1)
+
+  @notes 
+     [re_zero_length_buckets] If a bucket with zero value-length is in the
+     middle of the histogram, we will not have min==max. Example: suppose, 
+     pos_value=0x12, and the histogram is:
+
+           #n  #n+1 #n+2                 
+      ... 0x10 0x12 0x12 0x14 ...
+                      |
+                      +------------- bucket with zero value-length
+    
+      Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
+     
+      The problem happens at the histogram ends. if pos_value=0, and the
+      histogram is:
+
+      0x00 0x10 ...
+
+      then min=0, max=0. This means pos_value is contained within bucket #0,
+      but on the other hand, histogram data says that the bucket has only one
+      value.
+*/
+
+double Histogram::point_selectivity(double pos, double avg_sel)
+{
+  double sel;
+  /* Find the bucket that contains the value 'pos'. */
+  uint min= find_bucket(pos, TRUE);
+  uint pos_value= (uint) (pos * prec_factor());
+
+  /* Find how many buckets this value occupies */
+  uint max= min;
+  while (max + 1 < get_width() && get_value(max + 1) == pos_value)
+    max++;
+  
+  /*
+    A special case: we're looking at a single bucket, and that bucket has
+    zero value-length. Use the multi-bucket formula (attempt to use
+    single-bucket formula will cause divison by zero).
+
+    For more details see [re_zero_length_buckets] above.
+  */
+  if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
+    max++;
+
+  if (max > min)
+  {
+    /*
+      The value occupies multiple buckets. Use start_bucket ... end_bucket as
+      selectivity.
+    */
+    double bucket_sel= 1.0/(get_width() + 1);  
+    sel= bucket_sel * (max - min + 1);
+  }
+  else
+  {
+    /* 
+      The value 'pos' fits within one single histogram bucket.
+
+      Histogram buckets have the same numbers of rows, but they cover
+      different ranges of values.
+
+      We assume that values are uniformly distributed across the [0..1] value
+      range.
+    */
+
+    /* 
+      If all buckets covered value ranges of the same size, the width of
+      value range would be:
+    */
+    double avg_bucket_width= 1.0 / (get_width() + 1);
+    
+    /*
+      Let's see what is the width of value range that our bucket is covering.
+        (min==max currently. they are kept in the formula just in case we 
+         will want to extend it to handle multi-bucket case)
+    */
+    double inv_prec_factor= (double) 1.0 / prec_factor(); 
+    double current_bucket_width= 
+        (max + 1 == get_width() ?  1.0 : (get_value(max) * inv_prec_factor)) -
+        (min == 0 ?  0.0 : (get_value(min-1) * inv_prec_factor));
+
+    DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
+
+    /*
+      So:
+      - each bucket has the same #rows 
+      - values are unformly distributed across the [min_value,max_value] domain.
+
+      If a bucket has value range that's N times bigger then average, than
+      each value will have to have N times fewer rows than average.
+    */
+    sel= avg_sel * avg_bucket_width / current_bucket_width;
+
+    /*
+      (Q: if we just follow this proportion we may end up in a situation
+      where number of different values we expect to find in this bucket
+      exceeds the number of rows that this histogram has in a bucket. Are 
+      we ok with this or we would want to have certain caps?)
+    */
+  }
+  return sel;
+}
+
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index d0db0a3bf33..331e3559203 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -241,120 +241,10 @@ public:
     return sel;
   } 
   
-  
   /*
     Estimate selectivity of "col=const" using a histogram
-    
-    @param pos      Position of the "const" between column's min_value and 
-                    max_value.  This is a number in [0..1] range.
-    @param avg_sel  Average selectivity of condition "col=const" in this table.
-                    It is calcuated as (#non_null_values / #distinct_values).
-    
-    @return
-       Expected condition selectivity (a number between 0 and 1)
-
-    @notes 
-       [re_zero_length_buckets] If a bucket with zero value-length is in the
-       middle of the histogram, we will not have min==max. Example: suppose, 
-       pos_value=0x12, and the histogram is:
-
-             #n  #n+1 #n+2                 
-        ... 0x10 0x12 0x12 0x14 ...
-                        |
-                        +------------- bucket with zero value-length
-      
-        Here, we will get min=#n+1, max=#n+2, and use the multi-bucket formula.
-       
-        The problem happens at the histogram ends. if pos_value=0, and the
-        histogram is:
-
-        0x00 0x10 ...
-
-        then min=0, max=0. This means pos_value is contained within bucket #0,
-        but on the other hand, histogram data says that the bucket has only one
-        value.
   */
-
-  double point_selectivity(double pos, double avg_sel)
-  {
-    double sel;
-    /* Find the bucket that contains the value 'pos'. */
-    uint min= find_bucket(pos, TRUE);
-    uint pos_value= (uint) (pos * prec_factor());
-
-    /* Find how many buckets this value occupies */
-    uint max= min;
-    while (max + 1 < get_width() && get_value(max + 1) == pos_value)
-      max++;
-    
-    /*
-      A special case: we're looking at a single bucket, and that bucket has
-      zero value-length. Use the multi-bucket formula (attempt to use
-      single-bucket formula will cause divison by zero).
-
-      For more details see [re_zero_length_buckets] above.
-    */
-    if (max == min && get_value(max) == ((max==0)? 0 : get_value(max-1)))
-      max++;
-
-    if (max > min)
-    {
-      /*
-        The value occupies multiple buckets. Use start_bucket ... end_bucket as
-        selectivity.
-      */
-      double bucket_sel= 1.0/(get_width() + 1);  
-      sel= bucket_sel * (max - min + 1);
-    }
-    else
-    {
-      /* 
-        The value 'pos' fits within one single histogram bucket.
-
-        Histogram buckets have the same numbers of rows, but they cover
-        different ranges of values.
-
-        We assume that values are uniformly distributed across the [0..1] value
-        range.
-      */
-
-      /* 
-        If all buckets covered value ranges of the same size, the width of
-        value range would be:
-      */
-      double avg_bucket_width= 1.0 / (get_width() + 1);
-      
-      /*
-        Let's see what is the width of value range that our bucket is covering.
-          (min==max currently. they are kept in the formula just in case we 
-           will want to extend it to handle multi-bucket case)
-      */
-      double inv_prec_factor= (double) 1.0 / prec_factor(); 
-      double current_bucket_width= 
-          (max + 1 == get_width() ?  1.0 : (get_value(max) * inv_prec_factor)) -
-          (min == 0 ?  0.0 : (get_value(min-1) * inv_prec_factor));
-
-      DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
-
-      /*
-        So:
-        - each bucket has the same #rows 
-        - values are unformly distributed across the [min_value,max_value] domain.
-
-        If a bucket has value range that's N times bigger then average, than
-        each value will have to have N times fewer rows than average.
-      */
-      sel= avg_sel * avg_bucket_width / current_bucket_width;
-
-      /*
-        (Q: if we just follow this proportion we may end up in a situation
-        where number of different values we expect to find in this bucket
-        exceeds the number of rows that this histogram has in a bucket. Are 
-        we ok with this or we would want to have certain caps?)
-      */
-    }
-    return sel;
-  }
+  double point_selectivity(double pos, double avg_sel);
 };
author	Sergey Petrunya <psergey@askmonty.org>	2014-03-27 13:08:00 +0400
committer	Sergey Petrunya <psergey@askmonty.org>	2014-03-27 13:08:00 +0400
commit	79a8a6130b0c43e98a64a1fde8f277e0df06da5d (patch)
tree	2f1f0f604942584382be90140695865f890d88c9
parent	0d67aafaa2c383b4d2d76f8621109f8adbfb2532 (diff)
download	mariadb-git-79a8a6130b0c43e98a64a1fde8f277e0df06da5d.tar.gz