diff options
-rw-r--r-- | mysql-test/main/join_cardinality.result | 52 | ||||
-rw-r--r-- | mysql-test/main/join_cardinality.test | 34 | ||||
-rw-r--r-- | sql/field.cc | 2 | ||||
-rw-r--r-- | sql/item.cc | 25 | ||||
-rw-r--r-- | sql/item.h | 1 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 23 | ||||
-rw-r--r-- | sql/sql_select.cc | 9 |
7 files changed, 132 insertions, 14 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result index d825bfa8908..8ef6f761d90 100644 --- a/mysql-test/main/join_cardinality.result +++ b/mysql-test/main/join_cardinality.result @@ -993,5 +993,57 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; SET @@use_stat_tables= @save_use_stat_tables; SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +# +# For predicates like these a=a, it would be good to have the removed but for these currently +# we assume with such predicates we don't have accurate estimates for selectivity +# +EXPLAIN EXTENDED SELECT * FROM t1 WHERE a = a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` = `test`.`t1`.`a` +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + false +] +DROP TABLE t1; +# +# Tests where multiple eqaulities have a constant value like a=1 AND a=b +# The muliple equality would be Item_equal(1,a,b), here both having +# statistiscs for the range or ndv is fine +# +CREATE TABLE t1(a INT, b INT, c INT, d INT, KEY(a)); +INSERT INTO t1 SELECT seq, seq,seq,seq from seq_1_to_20; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +# DELETING the avg_frequency for column a and b from stat tables +update mysql.column_stats set avg_frequency= NULL +WHERE table_name='t1' AND (column_name='b' OR column_name='a'); +EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ref a a 5 const 1 100.00 +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d` from `test`.`t1` where `test`.`t1`.`a` = 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1 AND a=b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ref a a 5 const 1 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d` from `test`.`t1` where `test`.`t1`.`a` = 1 and `test`.`t1`.`b` = 1 +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] DROP TABLE t1; SET optimizer_switch=@save_optimizer_switch; diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test index f3b944ea0b0..e71364c40da 100644 --- a/mysql-test/main/join_cardinality.test +++ b/mysql-test/main/join_cardinality.test @@ -340,6 +340,40 @@ SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selecti SET @@use_stat_tables= @save_use_stat_tables; SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; +--echo # +--echo # For predicates like these a=a, it would be good to have the removed but for these currently +--echo # we assume with such predicates we don't have accurate estimates for selectivity +--echo # + +EXPLAIN EXTENDED SELECT * FROM t1 WHERE a = a; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1; + +--echo # +--echo # Tests where multiple eqaulities have a constant value like a=1 AND a=b +--echo # The muliple equality would be Item_equal(1,a,b), here both having +--echo # statistiscs for the range or ndv is fine +--echo # + +CREATE TABLE t1(a INT, b INT, c INT, d INT, KEY(a)); +INSERT INTO t1 SELECT seq, seq,seq,seq from seq_1_to_20; + +ANALYZE TABLE t1 PERSISTENT FOR ALL; + +--echo # DELETING the avg_frequency for column a and b from stat tables +update mysql.column_stats set avg_frequency= NULL +WHERE table_name='t1' AND (column_name='b' OR column_name='a'); + +EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1 AND a=b; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + DROP TABLE t1; SET optimizer_switch=@save_optimizer_switch; diff --git a/sql/field.cc b/sql/field.cc index 0c097eeb54e..a6c452a91bf 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -11400,7 +11400,7 @@ void Field::statistics_available_via_keys() void Field::statistics_available_via_stat_tables() { THD *thd= get_thd(); - if (thd->variables.optimizer_use_condition_selectivity > 1 && + if (thd->variables.optimizer_use_condition_selectivity > 2 && check_eits_preferred(thd)) { if (read_stats && !read_stats->no_stat_values_provided()) diff --git a/sql/item.cc b/sql/item.cc index 71b73ae314f..34931af2c55 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7507,7 +7507,8 @@ Item *Item::build_pushable_cond(THD *thd, col op const where op can be >/>=/</<=/=/<> Also the other cases are with [NOT] IN predicate, - [NOT] NULL predicate and LIKE predicate fall in the same category. + [NOT] NULL predicate and LIKE predicate fall + in the same category. The predicate should have only one non-constant argument and this argument will be a reference to a column that is used either as the first component of an index or statistics are available via @@ -7572,7 +7573,8 @@ Item *Item::build_pushable_cond(THD *thd, The implementation for this function use the 'walk' method to traverse the tree of this item with predicate_selectivity_checker() as the call-back parameter of the method. - + propagate_equal_fields() is called before this function is called so + Item_field::item_equal and Item_direct_view_ref::item_equal is set. @retval TRUE selectivity estimates are accurate @@ -7581,6 +7583,10 @@ Item *Item::build_pushable_cond(THD *thd, bool Item::with_accurate_selectivity_estimation() { + /* + For the below test one could use a virtual function but that would + take a lot of space for other item as there will be entires in the vtable + */ if (type() == Item::COND_ITEM && ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) { @@ -9344,6 +9350,8 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) This is used mostly for OR conjuncts where we need to make sure that the entire OR conjunct contains only one column, so that we may get accurate estimates. + An example with top level OR conjunct would be: + WHERE A=1 or A between 100 and 200 or A > 1000 @retval TRUE : the formula does not depend on one column @@ -10800,3 +10808,16 @@ bool Item::cleanup_excluding_immutables_processor (void *arg) return false; } } + + +bool Item::is_non_const_field_item() +{ + /* + calling real_item() here so that if the item is a REF_ITEM + then we would get the item field it is referring to + */ + Item *field_item= real_item(); + if (field_item->type() == Item::FIELD_ITEM && !field_item->const_item()) + return true; + return false; +} diff --git a/sql/item.h b/sql/item.h index 84063322a91..89f00d6762b 100644 --- a/sql/item.h +++ b/sql/item.h @@ -2506,6 +2506,7 @@ public: Checks if this item consists in the left part of arg IN subquery predicate */ bool pushable_equality_checker_for_subquery(uchar *arg); + bool is_non_const_field_item(); }; MEM_ROOT *get_thd_memroot(THD *thd); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 4fe1d78af45..3b46c86bc1c 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4320,8 +4320,9 @@ bool Item_func_in::predicate_selectivity_checker(void *arg) SAME_FIELD *field_arg= (SAME_FIELD*)arg; if (!field_arg->is_stats_available) return true; - all_items_are_consts(args + 1, arg_count - 1); - return false; + if (all_items_are_consts(args + 1, arg_count - 1)) + return false; + return true; } @@ -5557,7 +5558,7 @@ bool Item_func_null_predicate::predicate_selectivity_checker(void *arg) if (!field_arg->is_stats_available) return true; - if (is_range_predicate(args[0], NULL)) + if (args[0]->is_non_const_field_item()) return false; return true; } @@ -5769,7 +5770,7 @@ bool Item_func_like::predicate_selectivity_checker(void *arg) return true; if (with_sargable_pattern()) - return false; + return false; return true; } @@ -7220,6 +7221,20 @@ bool Item_equal::predicate_selectivity_checker(void *arg) available for all the fields in the multiple equality or not. */ Item_equal_fields_iterator it(*this); + + if (with_const) + { + while (it++) + { + Field *field= it.get_curr_field(); + if (!(field->is_range_statistics_available() || + field->is_ndv_available())) + return true; + } + return false; + } + + while (it++) { Field *field= it.get_curr_field(); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index c04eda05c57..414cab6f511 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -29478,13 +29478,8 @@ void unpack_to_base_table_fields(TABLE *table) bool is_range_predicate(Item *item, Item *value) { - /* - calling real_item() here so that if the item is a REF_ITEM - then we would get the item field it is referring to - */ - Item *field= item->real_item(); - if (field->type() == Item::FIELD_ITEM && !field->const_item() && - (!value || !value->is_expensive())) + if (item->is_non_const_field_item() && + (value->const_item() && !value->is_expensive())) return true; return false; } |