summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/main/join_cardinality.result52
-rw-r--r--mysql-test/main/join_cardinality.test34
-rw-r--r--sql/field.cc2
-rw-r--r--sql/item.cc25
-rw-r--r--sql/item.h1
-rw-r--r--sql/item_cmpfunc.cc23
-rw-r--r--sql/sql_select.cc9
7 files changed, 132 insertions, 14 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index d825bfa8908..8ef6f761d90 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -993,5 +993,57 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
SET @@use_stat_tables= @save_use_stat_tables;
SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+#
+# For predicates like these a=a, it would be good to have the removed but for these currently
+# we assume with such predicates we don't have accurate estimates for selectivity
+#
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a = a;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` = `test`.`t1`.`a`
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ false
+]
+DROP TABLE t1;
+#
+# Tests where multiple eqaulities have a constant value like a=1 AND a=b
+# The muliple equality would be Item_equal(1,a,b), here both having
+# statistiscs for the range or ndv is fine
+#
+CREATE TABLE t1(a INT, b INT, c INT, d INT, KEY(a));
+INSERT INTO t1 SELECT seq, seq,seq,seq from seq_1_to_20;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+# DELETING the avg_frequency for column a and b from stat tables
+update mysql.column_stats set avg_frequency= NULL
+WHERE table_name='t1' AND (column_name='b' OR column_name='a');
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ref a a 5 const 1 100.00
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d` from `test`.`t1` where `test`.`t1`.`a` = 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1 AND a=b;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 ref a a 5 const 1 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d` from `test`.`t1` where `test`.`t1`.`a` = 1 and `test`.`t1`.`b` = 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
DROP TABLE t1;
SET optimizer_switch=@save_optimizer_switch;
diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test
index f3b944ea0b0..e71364c40da 100644
--- a/mysql-test/main/join_cardinality.test
+++ b/mysql-test/main/join_cardinality.test
@@ -340,6 +340,40 @@ SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selecti
SET @@use_stat_tables= @save_use_stat_tables;
SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+--echo #
+--echo # For predicates like these a=a, it would be good to have the removed but for these currently
+--echo # we assume with such predicates we don't have accurate estimates for selectivity
+--echo #
+
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a = a;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Tests where multiple eqaulities have a constant value like a=1 AND a=b
+--echo # The muliple equality would be Item_equal(1,a,b), here both having
+--echo # statistiscs for the range or ndv is fine
+--echo #
+
+CREATE TABLE t1(a INT, b INT, c INT, d INT, KEY(a));
+INSERT INTO t1 SELECT seq, seq,seq,seq from seq_1_to_20;
+
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+
+--echo # DELETING the avg_frequency for column a and b from stat tables
+update mysql.column_stats set avg_frequency= NULL
+WHERE table_name='t1' AND (column_name='b' OR column_name='a');
+
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1 AND a=b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
DROP TABLE t1;
SET optimizer_switch=@save_optimizer_switch;
diff --git a/sql/field.cc b/sql/field.cc
index 0c097eeb54e..a6c452a91bf 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -11400,7 +11400,7 @@ void Field::statistics_available_via_keys()
void Field::statistics_available_via_stat_tables()
{
THD *thd= get_thd();
- if (thd->variables.optimizer_use_condition_selectivity > 1 &&
+ if (thd->variables.optimizer_use_condition_selectivity > 2 &&
check_eits_preferred(thd))
{
if (read_stats && !read_stats->no_stat_values_provided())
diff --git a/sql/item.cc b/sql/item.cc
index 71b73ae314f..34931af2c55 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7507,7 +7507,8 @@ Item *Item::build_pushable_cond(THD *thd,
col op const
where op can be >/>=/</<=/=/<>
Also the other cases are with [NOT] IN predicate,
- [NOT] NULL predicate and LIKE predicate fall in the same category.
+ [NOT] NULL predicate and LIKE predicate fall
+ in the same category.
The predicate should have only one non-constant argument and
this argument will be a reference to a column that is used either
as the first component of an index or statistics are available via
@@ -7572,7 +7573,8 @@ Item *Item::build_pushable_cond(THD *thd,
The implementation for this function use the 'walk' method to traverse
the tree of this item with predicate_selectivity_checker() as the
call-back parameter of the method.
-
+ propagate_equal_fields() is called before this function is called so
+ Item_field::item_equal and Item_direct_view_ref::item_equal is set.
@retval
TRUE selectivity estimates are accurate
@@ -7581,6 +7583,10 @@ Item *Item::build_pushable_cond(THD *thd,
bool Item::with_accurate_selectivity_estimation()
{
+ /*
+ For the below test one could use a virtual function but that would
+ take a lot of space for other item as there will be entires in the vtable
+ */
if (type() == Item::COND_ITEM &&
((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
{
@@ -9344,6 +9350,8 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
This is used mostly for OR conjuncts where we need to make sure
that the entire OR conjunct contains only one column, so that we may
get accurate estimates.
+ An example with top level OR conjunct would be:
+ WHERE A=1 or A between 100 and 200 or A > 1000
@retval
TRUE : the formula does not depend on one column
@@ -10800,3 +10808,16 @@ bool Item::cleanup_excluding_immutables_processor (void *arg)
return false;
}
}
+
+
+bool Item::is_non_const_field_item()
+{
+ /*
+ calling real_item() here so that if the item is a REF_ITEM
+ then we would get the item field it is referring to
+ */
+ Item *field_item= real_item();
+ if (field_item->type() == Item::FIELD_ITEM && !field_item->const_item())
+ return true;
+ return false;
+}
diff --git a/sql/item.h b/sql/item.h
index 84063322a91..89f00d6762b 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -2506,6 +2506,7 @@ public:
Checks if this item consists in the left part of arg IN subquery predicate
*/
bool pushable_equality_checker_for_subquery(uchar *arg);
+ bool is_non_const_field_item();
};
MEM_ROOT *get_thd_memroot(THD *thd);
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 4fe1d78af45..3b46c86bc1c 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -4320,8 +4320,9 @@ bool Item_func_in::predicate_selectivity_checker(void *arg)
SAME_FIELD *field_arg= (SAME_FIELD*)arg;
if (!field_arg->is_stats_available)
return true;
- all_items_are_consts(args + 1, arg_count - 1);
- return false;
+ if (all_items_are_consts(args + 1, arg_count - 1))
+ return false;
+ return true;
}
@@ -5557,7 +5558,7 @@ bool Item_func_null_predicate::predicate_selectivity_checker(void *arg)
if (!field_arg->is_stats_available)
return true;
- if (is_range_predicate(args[0], NULL))
+ if (args[0]->is_non_const_field_item())
return false;
return true;
}
@@ -5769,7 +5770,7 @@ bool Item_func_like::predicate_selectivity_checker(void *arg)
return true;
if (with_sargable_pattern())
- return false;
+ return false;
return true;
}
@@ -7220,6 +7221,20 @@ bool Item_equal::predicate_selectivity_checker(void *arg)
available for all the fields in the multiple equality or not.
*/
Item_equal_fields_iterator it(*this);
+
+ if (with_const)
+ {
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ if (!(field->is_range_statistics_available() ||
+ field->is_ndv_available()))
+ return true;
+ }
+ return false;
+ }
+
+
while (it++)
{
Field *field= it.get_curr_field();
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c04eda05c57..414cab6f511 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -29478,13 +29478,8 @@ void unpack_to_base_table_fields(TABLE *table)
bool is_range_predicate(Item *item, Item *value)
{
- /*
- calling real_item() here so that if the item is a REF_ITEM
- then we would get the item field it is referring to
- */
- Item *field= item->real_item();
- if (field->type() == Item::FIELD_ITEM && !field->const_item() &&
- (!value || !value->is_expensive()))
+ if (item->is_non_const_field_item() &&
+ (value->const_item() && !value->is_expensive()))
return true;
return false;
}