diff options
author | unknown <sanja@askmonty.org> | 2013-04-18 22:22:04 +0300 |
---|---|---|
committer | unknown <sanja@askmonty.org> | 2013-04-18 22:22:04 +0300 |
commit | 9441e536535af3e71aaa81801645ab42bd07e89f (patch) | |
tree | 4479a5d77e53380b7bd2d9b3d4db3c271ea6b40a | |
parent | 8c71e7a38358c79e1b5b9072fa7407cec3cfd273 (diff) | |
download | mariadb-git-9441e536535af3e71aaa81801645ab42bd07e89f.tar.gz |
MDEV-4345
Sampling of selectivity of LIKE predicate.
-rw-r--r-- | mysql-test/r/mysqld--help.result | 8 | ||||
-rw-r--r-- | mysql-test/r/selectivity.result | 83 | ||||
-rw-r--r-- | mysql-test/r/selectivity_innodb.result | 83 | ||||
-rw-r--r-- | mysql-test/suite/sys_vars/r/all_vars.result | 1 | ||||
-rw-r--r-- | mysql-test/t/selectivity.test | 59 | ||||
-rw-r--r-- | sql/item.h | 15 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 33 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 8 | ||||
-rw-r--r-- | sql/opt_range.cc | 69 | ||||
-rw-r--r-- | sql/opt_range.h | 7 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_select.cc | 72 | ||||
-rw-r--r-- | sql/sql_select.h | 13 | ||||
-rw-r--r-- | sql/sys_vars.cc | 15 | ||||
-rw-r--r-- | sql/table.cc | 1 | ||||
-rw-r--r-- | sql/table.h | 3 |
16 files changed, 464 insertions, 7 deletions
diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result index ff54af125e0..e1d9fe4e2fd 100644 --- a/mysql-test/r/mysqld--help.result +++ b/mysql-test/r/mysqld--help.result @@ -499,6 +499,9 @@ The following options may be given as the first argument: optimizer will switch to the original find_best search. NOTE: The value 63 and its associated behaviour is deprecated + --optimizer-selectivity-sampling-limit=# + Controls number of record samples to check condition + selectivity --optimizer-switch=name optimizer_switch=option=val[,option=val...], where option is one of {derived_merge, derived_with_keys, firstmatch, @@ -527,7 +530,9 @@ The following options may be given as the first argument: any index to calculate the cardinality of a partial join, 4 - use histograms to calculate selectivity of range conditions that are not backed by any index to calculate - the cardinality of a partial join. + the cardinality of a partial join.5 - additionally use + selectivity of certain non-range predicates calculated on + record samples --performance-schema Enable the performance schema. --performance-schema-events-waits-history-long-size=# @@ -1053,6 +1058,7 @@ old-passwords FALSE old-style-user-limits FALSE optimizer-prune-level 1 optimizer-search-depth 62 +optimizer-selectivity-sampling-limit 5000 optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=off,table_elimination=on optimizer-use-condition-selectivity 1 performance-schema FALSE diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 79ff506d603..512ef4ffc1b 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -37,6 +37,89 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit DROP DATABASE IF EXISTS dbt3_s001; CREATE DATABASE dbt3_s001; use dbt3_s001; +=== Q2 === +set optimizer_use_condition_selectivity=5; +explain extended +select +s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment +from +part, supplier, partsupp, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and p_size = 9 +and p_type like '%TIN' + and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + and ps_supplycost = ( +select +min(ps_supplycost) +from +partsupp, supplier, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + ) +order by +s_acctbal desc, n_name, s_name, p_partkey; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY part ALL PRIMARY NULL NULL NULL 200 0.31 Using where; Using temporary; Using filesort +1 PRIMARY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where; Using join buffer (flat, BNL join) +1 PRIMARY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 Using where +1 PRIMARY supplier ALL PRIMARY,i_s_nationkey NULL NULL NULL 10 80.00 Using where; Using join buffer (flat, BNL join) +1 PRIMARY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +2 DEPENDENT SUBQUERY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where +2 DEPENDENT SUBQUERY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 +2 DEPENDENT SUBQUERY supplier eq_ref PRIMARY,i_s_nationkey PRIMARY 4 dbt3_s001.partsupp.ps_suppkey 1 100.00 Using where +2 DEPENDENT SUBQUERY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +Warnings: +Note 1276 Field or reference 'dbt3_s001.part.p_partkey' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `dbt3_s001`.`supplier`.`s_acctbal` AS `s_acctbal`,`dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`nation`.`n_name` AS `n_name`,`dbt3_s001`.`part`.`p_partkey` AS `p_partkey`,`dbt3_s001`.`part`.`p_mfgr` AS `p_mfgr`,`dbt3_s001`.`supplier`.`s_address` AS `s_address`,`dbt3_s001`.`supplier`.`s_phone` AS `s_phone`,`dbt3_s001`.`supplier`.`s_comment` AS `s_comment` from `dbt3_s001`.`part` join `dbt3_s001`.`supplier` join `dbt3_s001`.`partsupp` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`partsupp`.`ps_partkey` = `dbt3_s001`.`part`.`p_partkey`) and (`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`part`.`p_size` = 9) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_type` like '%TIN') and (`dbt3_s001`.`partsupp`.`ps_supplycost` = <expr_cache><`dbt3_s001`.`part`.`p_partkey`>((select min(`dbt3_s001`.`partsupp`.`ps_supplycost`) from `dbt3_s001`.`partsupp` join `dbt3_s001`.`supplier` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_partkey` = `dbt3_s001`.`partsupp`.`ps_partkey`)))))) order by `dbt3_s001`.`supplier`.`s_acctbal` desc,`dbt3_s001`.`nation`.`n_name`,`dbt3_s001`.`supplier`.`s_name`,`dbt3_s001`.`part`.`p_partkey` +set optimizer_use_condition_selectivity=4; +explain extended +select +s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment +from +part, supplier, partsupp, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and p_size = 9 +and p_type like '%TIN' + and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + and ps_supplycost = ( +select +min(ps_supplycost) +from +partsupp, supplier, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + ) +order by +s_acctbal desc, n_name, s_name, p_partkey; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where; Using temporary; Using filesort +1 PRIMARY part ALL PRIMARY NULL NULL NULL 200 2.08 Using where; Using join buffer (flat, BNL join) +1 PRIMARY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 Using where +1 PRIMARY supplier ALL PRIMARY,i_s_nationkey NULL NULL NULL 10 80.00 Using where; Using join buffer (flat, BNL join) +1 PRIMARY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +2 DEPENDENT SUBQUERY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where +2 DEPENDENT SUBQUERY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 +2 DEPENDENT SUBQUERY supplier eq_ref PRIMARY,i_s_nationkey PRIMARY 4 dbt3_s001.partsupp.ps_suppkey 1 100.00 Using where +2 DEPENDENT SUBQUERY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +Warnings: +Note 1276 Field or reference 'dbt3_s001.part.p_partkey' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `dbt3_s001`.`supplier`.`s_acctbal` AS `s_acctbal`,`dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`nation`.`n_name` AS `n_name`,`dbt3_s001`.`part`.`p_partkey` AS `p_partkey`,`dbt3_s001`.`part`.`p_mfgr` AS `p_mfgr`,`dbt3_s001`.`supplier`.`s_address` AS `s_address`,`dbt3_s001`.`supplier`.`s_phone` AS `s_phone`,`dbt3_s001`.`supplier`.`s_comment` AS `s_comment` from `dbt3_s001`.`part` join `dbt3_s001`.`supplier` join `dbt3_s001`.`partsupp` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`partsupp`.`ps_partkey` = `dbt3_s001`.`part`.`p_partkey`) and (`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`part`.`p_size` = 9) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_type` like '%TIN') and (`dbt3_s001`.`partsupp`.`ps_supplycost` = <expr_cache><`dbt3_s001`.`part`.`p_partkey`>((select min(`dbt3_s001`.`partsupp`.`ps_supplycost`) from `dbt3_s001`.`partsupp` join `dbt3_s001`.`supplier` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_partkey` = `dbt3_s001`.`partsupp`.`ps_partkey`)))))) order by `dbt3_s001`.`supplier`.`s_acctbal` desc,`dbt3_s001`.`nation`.`n_name`,`dbt3_s001`.`supplier`.`s_name`,`dbt3_s001`.`part`.`p_partkey` === Q15 === create view revenue0 (supplier_no, total_revenue) as select l_suppkey, sum(l_extendedprice * (1 - l_discount)) diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index e32eea8827d..2dce973a587 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -40,6 +40,89 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit DROP DATABASE IF EXISTS dbt3_s001; CREATE DATABASE dbt3_s001; use dbt3_s001; +=== Q2 === +set optimizer_use_condition_selectivity=5; +explain extended +select +s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment +from +part, supplier, partsupp, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and p_size = 9 +and p_type like '%TIN' + and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + and ps_supplycost = ( +select +min(ps_supplycost) +from +partsupp, supplier, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + ) +order by +s_acctbal desc, n_name, s_name, p_partkey; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY part ALL PRIMARY NULL NULL NULL 200 0.31 Using where; Using temporary; Using filesort +1 PRIMARY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where; Using join buffer (flat, BNL join) +1 PRIMARY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 Using where +1 PRIMARY supplier ALL PRIMARY,i_s_nationkey NULL NULL NULL 10 80.00 Using where; Using join buffer (flat, BNL join) +1 PRIMARY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +2 DEPENDENT SUBQUERY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where +2 DEPENDENT SUBQUERY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 +2 DEPENDENT SUBQUERY supplier eq_ref PRIMARY,i_s_nationkey PRIMARY 4 dbt3_s001.partsupp.ps_suppkey 1 100.00 Using where +2 DEPENDENT SUBQUERY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +Warnings: +Note 1276 Field or reference 'dbt3_s001.part.p_partkey' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `dbt3_s001`.`supplier`.`s_acctbal` AS `s_acctbal`,`dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`nation`.`n_name` AS `n_name`,`dbt3_s001`.`part`.`p_partkey` AS `p_partkey`,`dbt3_s001`.`part`.`p_mfgr` AS `p_mfgr`,`dbt3_s001`.`supplier`.`s_address` AS `s_address`,`dbt3_s001`.`supplier`.`s_phone` AS `s_phone`,`dbt3_s001`.`supplier`.`s_comment` AS `s_comment` from `dbt3_s001`.`part` join `dbt3_s001`.`supplier` join `dbt3_s001`.`partsupp` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`partsupp`.`ps_partkey` = `dbt3_s001`.`part`.`p_partkey`) and (`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`part`.`p_size` = 9) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_type` like '%TIN') and (`dbt3_s001`.`partsupp`.`ps_supplycost` = <expr_cache><`dbt3_s001`.`part`.`p_partkey`>((select min(`dbt3_s001`.`partsupp`.`ps_supplycost`) from `dbt3_s001`.`partsupp` join `dbt3_s001`.`supplier` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_partkey` = `dbt3_s001`.`partsupp`.`ps_partkey`)))))) order by `dbt3_s001`.`supplier`.`s_acctbal` desc,`dbt3_s001`.`nation`.`n_name`,`dbt3_s001`.`supplier`.`s_name`,`dbt3_s001`.`part`.`p_partkey` +set optimizer_use_condition_selectivity=4; +explain extended +select +s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment +from +part, supplier, partsupp, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and p_size = 9 +and p_type like '%TIN' + and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + and ps_supplycost = ( +select +min(ps_supplycost) +from +partsupp, supplier, nation, region +where +p_partkey = ps_partkey +and s_suppkey = ps_suppkey +and s_nationkey = n_nationkey +and n_regionkey = r_regionkey +and r_name = 'ASIA' + ) +order by +s_acctbal desc, n_name, s_name, p_partkey; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where; Using temporary; Using filesort +1 PRIMARY part ALL PRIMARY NULL NULL NULL 200 2.08 Using where; Using join buffer (flat, BNL join) +1 PRIMARY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 Using where +1 PRIMARY supplier ALL PRIMARY,i_s_nationkey NULL NULL NULL 10 80.00 Using where; Using join buffer (flat, BNL join) +1 PRIMARY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +2 DEPENDENT SUBQUERY region ALL PRIMARY NULL NULL NULL 5 20.00 Using where +2 DEPENDENT SUBQUERY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 +2 DEPENDENT SUBQUERY supplier eq_ref PRIMARY,i_s_nationkey PRIMARY 4 dbt3_s001.partsupp.ps_suppkey 1 100.00 Using where +2 DEPENDENT SUBQUERY nation eq_ref PRIMARY,i_n_regionkey PRIMARY 4 dbt3_s001.supplier.s_nationkey 1 100.00 Using where +Warnings: +Note 1276 Field or reference 'dbt3_s001.part.p_partkey' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `dbt3_s001`.`supplier`.`s_acctbal` AS `s_acctbal`,`dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`nation`.`n_name` AS `n_name`,`dbt3_s001`.`part`.`p_partkey` AS `p_partkey`,`dbt3_s001`.`part`.`p_mfgr` AS `p_mfgr`,`dbt3_s001`.`supplier`.`s_address` AS `s_address`,`dbt3_s001`.`supplier`.`s_phone` AS `s_phone`,`dbt3_s001`.`supplier`.`s_comment` AS `s_comment` from `dbt3_s001`.`part` join `dbt3_s001`.`supplier` join `dbt3_s001`.`partsupp` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`partsupp`.`ps_partkey` = `dbt3_s001`.`part`.`p_partkey`) and (`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`part`.`p_size` = 9) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_type` like '%TIN') and (`dbt3_s001`.`partsupp`.`ps_supplycost` = <expr_cache><`dbt3_s001`.`part`.`p_partkey`>((select min(`dbt3_s001`.`partsupp`.`ps_supplycost`) from `dbt3_s001`.`partsupp` join `dbt3_s001`.`supplier` join `dbt3_s001`.`nation` join `dbt3_s001`.`region` where ((`dbt3_s001`.`supplier`.`s_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`nation`.`n_nationkey` = `dbt3_s001`.`supplier`.`s_nationkey`) and (`dbt3_s001`.`nation`.`n_regionkey` = `dbt3_s001`.`region`.`r_regionkey`) and (`dbt3_s001`.`region`.`r_name` = 'ASIA') and (`dbt3_s001`.`part`.`p_partkey` = `dbt3_s001`.`partsupp`.`ps_partkey`)))))) order by `dbt3_s001`.`supplier`.`s_acctbal` desc,`dbt3_s001`.`nation`.`n_name`,`dbt3_s001`.`supplier`.`s_name`,`dbt3_s001`.`part`.`p_partkey` === Q15 === create view revenue0 (supplier_no, total_revenue) as select l_suppkey, sum(l_extendedprice * (1 - l_discount)) diff --git a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result index 1bd4e394f6a..7beccc4d3bf 100644 --- a/mysql-test/suite/sys_vars/r/all_vars.result +++ b/mysql-test/suite/sys_vars/r/all_vars.result @@ -10,5 +10,6 @@ there should be *no* long test name listed below: select distinct variable_name as `there should be *no* variables listed below:` from t2 left join t1 on variable_name=test_name where test_name is null; there should be *no* variables listed below: +optimizer_selectivity_sampling_limit drop table t1; drop table t2; diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index 56908e50c65..8bde4718fdd 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -56,6 +56,64 @@ customer, lineitem, nation, orders, part, partsupp, region, supplier; --enable_result_log --enable_query_log +--echo === Q2 === + +set optimizer_use_condition_selectivity=5; +explain extended +select + s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment +from + part, supplier, partsupp, nation, region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 9 + and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, supplier, nation, region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + ) +order by + s_acctbal desc, n_name, s_name, p_partkey; +set optimizer_use_condition_selectivity=4; +explain extended +select + s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment +from + part, supplier, partsupp, nation, region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 9 + and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, supplier, nation, region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'ASIA' + ) +order by + s_acctbal desc, n_name, s_name, p_partkey; --echo === Q15 === @@ -230,7 +288,6 @@ flush table nation; eval EXPLAIN EXTENDED $Q20; eval $Q20; - DROP DATABASE dbt3_s001; set histogram_type=@save_histogram_type; diff --git a/sql/item.h b/sql/item.h index 50d125f76de..d8ae051b4b5 100644 --- a/sql/item.h +++ b/sql/item.h @@ -548,6 +548,14 @@ typedef bool (Item::*Item_analyzer) (uchar **argp); typedef Item* (Item::*Item_transformer) (uchar *arg); typedef void (*Cond_traverser) (const Item *item, void *arg); +struct st_cond_statistic; + +struct find_selective_predicates_list_processor_data +{ + TABLE *table; + List<st_cond_statistic> list; +}; + class Item_equal; class COND_EQUAL; @@ -1108,6 +1116,11 @@ public: return (this->*processor)(arg); } + virtual bool walk_top_and(Item_processor processor, uchar *arg) + { + return (this->*processor)(arg); + } + virtual Item* transform(Item_transformer transformer, uchar *arg); /* @@ -1174,6 +1187,8 @@ public: return FALSE; } virtual bool exists2in_processor(uchar *opt_arg) { return 0; } + virtual bool find_selective_predicates_list_processor(uchar *opt_arg) + { return 0; } /* To call bool function for all arguments */ struct bool_func_call_args diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index b5b143a2448..7ca85a72cfa 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4428,6 +4428,16 @@ bool Item_cond::walk(Item_processor processor, bool walk_subquery, uchar *arg) return Item_func::walk(processor, walk_subquery, arg); } +bool Item_cond_and::walk_top_and(Item_processor processor, uchar *arg) +{ + List_iterator_fast<Item> li(list); + Item *item; + while ((item= li++)) + if (item->walk_top_and(processor, arg)) + return 1; + return Item_cond::walk_top_and(processor, arg); +} + /** Transform an Item_cond object with a transformer callback function. @@ -4940,6 +4950,7 @@ bool Item_func_like::fix_fields(THD *thd, Item **ref) turboBM_compute_bad_character_shifts(); DBUG_PRINT("info",("done")); } + use_sampling= ((*first == wild_many || *first == wild_one) && len > 2); } } return FALSE; @@ -4951,6 +4962,28 @@ void Item_func_like::cleanup() Item_bool_func2::cleanup(); } + +bool Item_func_like::find_selective_predicates_list_processor(uchar *arg) +{ + find_selective_predicates_list_processor_data *data= + (find_selective_predicates_list_processor_data *) arg; + if (use_sampling && used_tables() == data->table->map) + { + COND_STATISTIC *stat= (COND_STATISTIC *)sql_alloc(sizeof(COND_STATISTIC)); + if (!stat) + return TRUE; + stat->cond= this; + Item *arg0= args[0]->real_item(); + if (args[1]->const_item() && arg0->type() == FIELD_ITEM) + stat->field_arg= ((Item_field *)arg0)->field; + else + stat->field_arg= NULL; + data->list.push_back(stat); + } + return FALSE; +} + + /** @brief Compile regular expression. diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 81598ba96c8..ab4219564fe 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1487,8 +1487,9 @@ class Item_func_like :public Item_bool_func2 enum { alphabet_size = 256 }; Item *escape_item; - + bool escape_used_in_parsing; + bool use_sampling; public: int escape; @@ -1496,7 +1497,7 @@ public: Item_func_like(Item *a,Item *b, Item *escape_arg, bool escape_used) :Item_bool_func2(a,b), canDoTurboBM(FALSE), pattern(0), pattern_len(0), bmGs(0), bmBc(0), escape_item(escape_arg), - escape_used_in_parsing(escape_used) {} + escape_used_in_parsing(escape_used), use_sampling(0) {} longlong val_int(); enum Functype functype() const { return LIKE_FUNC; } optimize_type select_optimize() const; @@ -1504,6 +1505,8 @@ public: const char *func_name() const { return "like"; } bool fix_fields(THD *thd, Item **ref); void cleanup(); + + bool find_selective_predicates_list_processor(uchar *arg); }; @@ -1914,6 +1917,7 @@ public: Item *neg_transformer(THD *thd); void mark_as_condition_AND_part(TABLE_LIST *embedding); virtual uint exists2in_reserved_items() { return list.elements; }; + bool walk_top_and(Item_processor processor, uchar *arg); }; inline bool is_cond_and(Item *item) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index c29a888ea6f..b736f898768 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3530,7 +3530,74 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) } } } - } + } + + /* Calculate selectivity of probably highly selective predicates */ + ulong check_rows= + min(thd->variables.optimizer_selectivity_sampling_limit, + (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE)); + if (cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && + thd->variables.optimizer_use_condition_selectivity > 4) + { + find_selective_predicates_list_processor_data *dt= + (find_selective_predicates_list_processor_data *) + alloc_root(thd->mem_root, + sizeof(find_selective_predicates_list_processor_data)); + if (!dt) + DBUG_RETURN(TRUE); + dt->list.empty(); + dt->table= table; + if (cond->walk(&Item::find_selective_predicates_list_processor, 0, + (uchar*) dt)) + DBUG_RETURN(TRUE); + if (dt->list.elements > 0) + { + check_rows= check_selectivity(thd, check_rows, table, &dt->list); + if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD) + { + COND_STATISTIC *stat; + List_iterator_fast<COND_STATISTIC> it(dt->list); + double examined_rows= check_rows; + while ((stat= it++)) + { + if (!stat->positive) + { + DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter")); + stat->positive= 1; // avoid 0 + } + DBUG_PRINT("info", ("The predicate selectivity : %g", + (double)stat->positive / examined_rows)); + double selectivity= ((double)stat->positive) / examined_rows; + table->cond_selectivity*= selectivity; + /* + If a field is involved then we register its selectivity in case + there in an equality with the field. + For example in case + t1.a LIKE "%bla%" and t1.a = t2.b + the selectivity we have found could be used also for t2. + */ + if (stat->field_arg) + { + stat->field_arg->cond_selectivity*= selectivity; + + if (stat->field_arg->next_equal_field) + { + for (Field *next_field= stat->field_arg->next_equal_field; + next_field != stat->field_arg; + next_field= next_field->next_equal_field) + { + next_field->cond_selectivity*= selectivity; + next_field->table->cond_selectivity*= selectivity; + } + } + } + } + + } + /* This list and its elements put to mem_root so should not be freed */ + table->cond_selectivity_sampling_explain= &dt->list; + } + } DBUG_RETURN(FALSE); } diff --git a/sql/opt_range.h b/sql/opt_range.h index d98bf1186e8..963551cabdb 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1051,4 +1051,11 @@ void store_key_image_to_rec(Field *field, uchar *ptr, uint len); extern String null_string; +/* check this number of rows (default value) */ +#define SELECTIVITY_SAMPLING_LIMIT 5000 +/* but no more then this part of table (10%) */ +#define SELECTIVITY_SAMPLING_SHARE 0.10 +/* do not check if we are going check less then this number of records */ +#define SELECTIVITY_SAMPLING_THRESHOLD 10 + #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index 8e2bd59da57..bb5b2c4e775 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -505,6 +505,7 @@ typedef struct system_variables ulong net_write_timeout; ulong optimizer_prune_level; ulong optimizer_search_depth; + ulong optimizer_selectivity_sampling_limit; ulong optimizer_use_condition_selectivity; ulong use_stat_tables; ulong histogram_size; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index cc4b139fb2f..a48eff66386 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -23972,6 +23972,78 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, return MAX_KEY; } +/* + Count how much times conditions are true for several first rows of the table + + @param thd thread handle + @param rows_to_read how much rows to check + @param table table which should be checked + @conds conds list of conditions and countars for them + + @return number of really checked rows or 0 in case of error or empty table +*/ + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List<COND_STATISTIC> *conds) +{ + ulong count= 0; + COND_STATISTIC *cond; + List_iterator_fast<COND_STATISTIC> it(*conds); + handler *file= table->file; + uchar *record= table->record[0]; + int error= 0; + DBUG_ENTER("check_selectivity"); + + DBUG_ASSERT(rows_to_read > 0); + while ((cond= it++)) + { + DBUG_ASSERT(cond->cond); + DBUG_ASSERT(cond->cond->used_tables() == table->map); + cond->positive= 0; + } + it.rewind(); + + if (file->ha_rnd_init_with_error(1)) + DBUG_RETURN(0); + do + { + error= file->ha_rnd_next(record); + + if (thd->killed) + { + thd->send_kill_message(); + count= 0; + goto err; + } + if (error) + { + if (error == HA_ERR_RECORD_DELETED) + continue; + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + + count++; + while ((cond= it++)) + { + if (cond->cond->val_bool()) + cond->positive++; + } + it.rewind(); + + } while (count < rows_to_read); + + file->ha_rnd_end(); + DBUG_RETURN(count); + +err: + DBUG_PRINT("error", ("error %d", error)); + file->ha_rnd_end(); + DBUG_RETURN(0); +} /** @} (end of group Query_Optimizer) diff --git a/sql/sql_select.h b/sql/sql_select.h index 9cda68bd434..950c48d6ea1 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1855,4 +1855,17 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps); double prev_record_reads(POSITION *positions, uint idx, table_map found_ref); void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); +struct st_cond_statistic +{ + Item *cond; + Field *field_arg; + ulong positive; +}; +typedef struct st_cond_statistic COND_STATISTIC; + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List<COND_STATISTIC> *conds); + #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 4393809a6fe..24009cb5a99 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -56,6 +56,7 @@ #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ #include "threadpool.h" #include "sql_repl.h" +#include "opt_range.h" /* The rule for this file: everything should be 'static'. When a sys_var @@ -1533,6 +1534,14 @@ static Sys_var_ulong Sys_optimizer_prune_level( SESSION_VAR(optimizer_prune_level), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1), DEFAULT(1), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( + "optimizer_selectivity_sampling_limit", + "Controls number of record samples to check condition selectivity", + SESSION_VAR(optimizer_selectivity_sampling_limit), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), + DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "optimizer_use_condition_selectivity", "Controls selectivity of which conditions the optimizer takes into " @@ -1548,9 +1557,11 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "not backed by any index to calculate the cardinality of a partial join, " "4 - use histograms to calculate selectivity of range conditions that " "are not backed by any index to calculate the cardinality of " - "a partial join.", + "a partial join." + "5 - additionally use selectivity of certain non-range predicates " + "calculated on record samples", SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(1, 4), DEFAULT(1), BLOCK_SIZE(1)); + VALID_RANGE(1, 5), DEFAULT(1), BLOCK_SIZE(1)); /** Warns about deprecated value 63 */ static bool fix_optimizer_search_depth(sys_var *self, THD *thd, diff --git a/sql/table.cc b/sql/table.cc index e88b3453ce9..caed8ed4107 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -3891,6 +3891,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) reginfo.impossible_range= 0; created= TRUE; cond_selectivity= 1.0; + cond_selectivity_sampling_explain= NULL; /* Catch wrong handling of the auto_increment_field_not_null. */ DBUG_ASSERT(!auto_increment_field_not_null); diff --git a/sql/table.h b/sql/table.h index e721d60f892..af79bffd437 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1038,6 +1038,8 @@ enum index_hint_type INDEX_HINT_FORCE }; +struct st_cond_statistic; + #define CHECK_ROW_FOR_NULLS_TO_REJECT (1 << 0) #define REJECT_ROW_DUE_TO_NULL_FIELDS (1 << 1) @@ -1163,6 +1165,7 @@ public: ha_rows quick_condition_rows; double cond_selectivity; + List<st_cond_statistic> *cond_selectivity_sampling_explain; table_map map; /* ID bit of table (1,2,4,8,16...) */ |