7 files changed, 132 insertions, 14 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index d825bfa8908..8ef6f761d90 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -993,5 +993,57 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
 SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
 SET @@use_stat_tables= @save_use_stat_tables;
 SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+#
+# For predicates like these a=a, it would be good to have the removed but for these currently
+# we assume with such predicates we don't have accurate estimates for selectivity
+#
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a = a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` = `test`.`t1`.`a`
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+    false
+]
+DROP TABLE t1;
+#
+# Tests where multiple eqaulities have a constant value like a=1 AND a=b
+# The muliple equality would be Item_equal(1,a,b), here both having
+# statistiscs for the range or ndv is fine
+#
+CREATE TABLE t1(a INT, b INT, c INT, d INT, KEY(a));
+INSERT INTO t1 SELECT seq, seq,seq,seq from seq_1_to_20;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	Table is already up to date
+# DELETING the avg_frequency for column a and b from stat tables
+update mysql.column_stats set avg_frequency= NULL
+WHERE table_name='t1' AND (column_name='b' OR column_name='a');
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ref	a	a	5	const	1	100.00	
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d` from `test`.`t1` where `test`.`t1`.`a` = 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+    true
+]
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1 AND a=b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ref	a	a	5	const	1	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c`,`test`.`t1`.`d` AS `d` from `test`.`t1` where `test`.`t1`.`a` = 1 and `test`.`t1`.`b` = 1
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+    true
+]
 DROP TABLE t1;
 SET optimizer_switch=@save_optimizer_switch;
diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test
index f3b944ea0b0..e71364c40da 100644
--- a/mysql-test/main/join_cardinality.test
+++ b/mysql-test/main/join_cardinality.test
@@ -340,6 +340,40 @@ SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selecti
 SET @@use_stat_tables= @save_use_stat_tables;
 SET @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
 
+--echo #
+--echo # For predicates like these a=a, it would be good to have the removed but for these currently
+--echo # we assume with such predicates we don't have accurate estimates for selectivity
+--echo #
+
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a = a;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1;
+
+--echo #
+--echo # Tests where multiple eqaulities have a constant value like a=1 AND a=b
+--echo # The muliple equality would be Item_equal(1,a,b), here both having
+--echo # statistiscs for the range or ndv is fine
+--echo #
+
+CREATE TABLE t1(a INT, b INT, c INT, d INT, KEY(a));
+INSERT INTO t1 SELECT seq, seq,seq,seq from seq_1_to_20;
+
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+
+--echo # DELETING the avg_frequency for column a and b from stat tables
+update mysql.column_stats set avg_frequency= NULL
+WHERE table_name='t1' AND (column_name='b' OR column_name='a');
+
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=1 AND a=b;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
 DROP TABLE t1;
 
 SET optimizer_switch=@save_optimizer_switch;
diff --git a/sql/field.cc b/sql/field.cc
index 0c097eeb54e..a6c452a91bf 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -11400,7 +11400,7 @@ void Field::statistics_available_via_keys()
 void Field::statistics_available_via_stat_tables()
 {
   THD *thd= get_thd();
-  if (thd->variables.optimizer_use_condition_selectivity > 1 &&
+  if (thd->variables.optimizer_use_condition_selectivity > 2 &&
       check_eits_preferred(thd))
   {
     if (read_stats && !read_stats->no_stat_values_provided())
diff --git a/sql/item.cc b/sql/item.cc
index 71b73ae314f..34931af2c55 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7507,7 +7507,8 @@ Item *Item::build_pushable_cond(THD *thd,
                col op const
               where op can be  >/>=/</<=/=/<>
               Also the other cases are with [NOT] IN predicate,
-              [NOT] NULL predicate and  LIKE predicate fall in the same category.
+              [NOT] NULL predicate and  LIKE predicate fall
+              in the same category.
             The predicate should have only one non-constant argument and
             this argument will be a reference to a column that is used either
             as the first component of an index or statistics are available via
@@ -7572,7 +7573,8 @@ Item *Item::build_pushable_cond(THD *thd,
     The implementation for this function use the 'walk' method to traverse
     the tree of this item with predicate_selectivity_checker() as the
     call-back parameter of the method.
-
+    propagate_equal_fields() is called before this function is called so
+    Item_field::item_equal and Item_direct_view_ref::item_equal is set.
 
   @retval
     TRUE         selectivity estimates are accurate
@@ -7581,6 +7583,10 @@ Item *Item::build_pushable_cond(THD *thd,
 
 bool Item::with_accurate_selectivity_estimation()
 {
+  /*
+    For the below test one could use a virtual function but that would
+    take a lot of space for other item as there will be entires in the vtable
+  */
   if (type() == Item::COND_ITEM &&
       ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
   {
@@ -9344,6 +9350,8 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
     This is used mostly for OR conjuncts where we need to make sure
     that the entire OR conjunct contains only one column, so that we may
     get accurate estimates.
+    An example with top level OR conjunct would be:
+      WHERE A=1 or A between 100 and 200 or A > 1000
 
   @retval
     TRUE   : the formula does not depend on one column
@@ -10800,3 +10808,16 @@ bool Item::cleanup_excluding_immutables_processor (void *arg)
     return false;
   }
 }
+
+
+bool Item::is_non_const_field_item()
+{
+  /*
+    calling real_item() here so that if the item is a REF_ITEM
+    then we would get the item field it is referring to
+  */
+  Item *field_item= real_item();
+  if (field_item->type() == Item::FIELD_ITEM && !field_item->const_item())
+    return true;
+  return false;
+}
diff --git a/sql/item.h b/sql/item.h
index 84063322a91..89f00d6762b 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -2506,6 +2506,7 @@ public:
     Checks if this item consists in the left part of arg IN subquery predicate
   */
   bool pushable_equality_checker_for_subquery(uchar *arg);
+  bool is_non_const_field_item();
 };
 
 MEM_ROOT *get_thd_memroot(THD *thd);
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 4fe1d78af45..3b46c86bc1c 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -4320,8 +4320,9 @@ bool Item_func_in::predicate_selectivity_checker(void *arg)
   SAME_FIELD *field_arg= (SAME_FIELD*)arg;
   if (!field_arg->is_stats_available)
     return true;
-  all_items_are_consts(args + 1, arg_count - 1);
-  return false;
+  if (all_items_are_consts(args + 1, arg_count - 1))
+    return false;
+  return true;
 }
 
 
@@ -5557,7 +5558,7 @@ bool Item_func_null_predicate::predicate_selectivity_checker(void *arg)
   if (!field_arg->is_stats_available)
     return true;
 
-  if (is_range_predicate(args[0], NULL))
+  if (args[0]->is_non_const_field_item())
     return false;
   return true;
 }
@@ -5769,7 +5770,7 @@ bool Item_func_like::predicate_selectivity_checker(void *arg)
     return true;
 
   if (with_sargable_pattern())
-      return false;
+    return false;
   return true;
 }
 
@@ -7220,6 +7221,20 @@ bool Item_equal::predicate_selectivity_checker(void *arg)
     available for all the fields in the multiple equality or not.
   */
   Item_equal_fields_iterator it(*this);
+
+  if (with_const)
+  {
+    while (it++)
+    {
+      Field *field= it.get_curr_field();
+      if (!(field->is_range_statistics_available() ||
+            field->is_ndv_available()))
+        return true;
+    }
+    return false;
+  }
+
+
   while (it++)
   {
     Field *field= it.get_curr_field();
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c04eda05c57..414cab6f511 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -29478,13 +29478,8 @@ void unpack_to_base_table_fields(TABLE *table)
 
 bool is_range_predicate(Item *item, Item *value)
 {
-  /*
-    calling real_item() here so that if the item is a REF_ITEM
-    then we would get the item field it is referring to
-  */
-  Item *field= item->real_item();
-  if (field->type() == Item::FIELD_ITEM && !field->const_item() &&
-      (!value || !value->is_expensive()))
+  if (item->is_non_const_field_item() &&
+     (value->const_item() && !value->is_expensive()))
     return true;
   return false;
 }