summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2021-02-05 23:54:28 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2021-02-06 14:22:01 +0530
commitcb5223a49f754e52dd2f9f85632c9110b6a70a29 (patch)
treef5d4c48355139349f04ea456c7adbd843965eb35
parent957531a06bb0dd2c082d07c94f6e3f4557c1621f (diff)
downloadmariadb-git-cb5223a49f754e52dd2f9f85632c9110b6a70a29.tar.gz
Using recurisive descent instead of walk to check if the join cardinality estimates are precise
-rw-r--r--mysql-test/main/join_cardinality.result2
-rw-r--r--sql/field.cc6
-rw-r--r--sql/item.cc198
-rw-r--r--sql/item.h9
-rw-r--r--sql/item_cmpfunc.cc91
-rw-r--r--sql/item_cmpfunc.h4
-rw-r--r--sql/sql_select.cc9
7 files changed, 176 insertions, 143 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index e41438e1a3e..e98de9c087b 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -733,7 +733,7 @@ SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
[
- true
+ false
]
EXPLAIN SELECT * from t1
WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3));
diff --git a/sql/field.cc b/sql/field.cc
index 3b721d0316d..5d4a3eca93f 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -11403,7 +11403,8 @@ void Field::statistics_available_via_stat_tables()
if (thd->variables.optimizer_use_condition_selectivity > 2 &&
check_eits_preferred(thd))
{
- if (read_stats && !read_stats->no_stat_values_provided())
+ if (table && table->stats_is_read &&
+ read_stats && !read_stats->no_stat_values_provided())
{
// Only need min and max values for a column to get range statistics
if (read_stats->min_max_values_are_provided())
@@ -11502,7 +11503,8 @@ bool Field::is_ndv_available_via_stat_tables()
{
if (check_eits_preferred(get_thd()))
{
- if ((read_stats && !read_stats->no_stat_values_provided() &&
+ if ((table && table->stats_is_read &&
+ read_stats && !read_stats->no_stat_values_provided() &&
!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)))
{
stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
diff --git a/sql/item.cc b/sql/item.cc
index 408b8dd7f8b..6cacf27ecce 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7509,7 +7509,6 @@ Item *Item::build_pushable_cond(THD *thd,
this argument will be a reference to a column that is used either
as the first component of an index or statistics are available via
statistical tables.
-
b. Equalities:
For an equality to have accurate selectivity estimates,
the number of distinct values for each column in the equality
@@ -7521,7 +7520,6 @@ Item *Item::build_pushable_cond(THD *thd,
The number of distinct values for a column can be known by
1) from indexes via rec_per_key
2) from statistical tables via avg_frequency.
-
2. (recursive step)
AND / OR formula over formulas defined in section 1 of the
definition.
@@ -7564,11 +7562,14 @@ Item *Item::build_pushable_cond(THD *thd,
In the end for all fields we may have selectivity from an index or
statistical tables.
+ 3. (optional additional non-recursive step)
+ Conjunction of formulas described in sections 1 and 2.
@notes
- The implementation for this function use the 'walk' method to traverse
- the tree of this item with predicate_selectivity_checker() as the
- call-back parameter of the method.
+ The implementation for this function use the recursive descent to walk
+ over the condition tree. A call-back function
+ predicate_selectivity_checker() is passed as a parameter to this function.
+
propagate_equal_fields() is called before this function is called so
Item_field::item_equal and Item_direct_view_ref::item_equal is set.
@@ -7577,27 +7578,55 @@ Item *Item::build_pushable_cond(THD *thd,
FALSE OTHERWISE
*/
-bool Item::with_accurate_selectivity_estimation()
+bool Item::with_accurate_selectivity_estimation(Predicate_checker checker,
+ void *arg, bool top_level)
{
- /*
- For the below test one could use a virtual function but that would
- take a lot of space for other item as there will be entires in the vtable
- */
- if (type() == Item::COND_ITEM &&
- ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
+ if (type() == Item::COND_ITEM)
{
- List_iterator<Item> li(*((Item_cond*) this)->argument_list());
+ Item_cond_and *and_cond=
+ (((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) ?
+ ((Item_cond_and*) this) : 0;
+
+ List<Item> *arg_list= ((Item_cond*) this)->argument_list();
+ List_iterator<Item> li(*arg_list);
Item *item;
- while ((item= li++))
+
+ if (and_cond)
+ {
+ uint counter=0;
+ while ((item=li++))
+ {
+ SAME_FIELD arg_same_field= {NULL, FALSE};
+ if (item->with_accurate_selectivity_estimation(checker,
+ (top_level ?
+ (void *)&arg_same_field :
+ arg),
+ top_level))
+ {
+ counter++;
+ }
+ }
+
+ if (counter == arg_list->elements)
+ return true;
+ }
+ else
{
- SAME_FIELD arg= {NULL, false};
- if (item->walk(&Item::predicate_selectivity_checker, 0, &arg))
- return false;
+ uint counter=0;
+ while ((item=li++))
+ {
+ if (item->with_accurate_selectivity_estimation(checker, arg, FALSE))
+ {
+ counter++;
+ }
+ }
+ if (counter == arg_list->elements)
+ return true;
}
- return true;
+ return false;
}
- SAME_FIELD arg= {NULL, false};
- return !walk(&Item::predicate_selectivity_checker, 0, &arg);
+
+ return !((this->*checker) (arg));
}
@@ -9327,81 +9356,6 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
}
-/*
- @brief
- Checks if a formula of a condition contains the same column
-
- @details
- In the function we try to check if a formula of a condition depends
- (directly or indirectly through equalities inferred from the
- conjuncted multiple equalities) only on one column.
-
- Eg:
- WHERE clause is:
- t1.a=t2.b and (t1.a > 5 or t2.b < 1);
-
- the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
- equalities to conclude that it depends on one column.
-
- This is used mostly for OR conjuncts where we need to make sure
- that the entire OR conjunct contains only one column, so that we may
- get accurate estimates.
- An example with top level OR conjunct would be:
- WHERE A=1 or A between 100 and 200 or A > 1000
-
- @retval
- TRUE : the formula does not depend on one column
- FALSE : OTHERWISE
-*/
-
-bool Item_field::predicate_selectivity_checker(void *arg)
-{
- SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
-
- /*
- The same_field_arg is passed as a parameter because when we start walking
- over the condition tree we don't know which column the predicate will be
- dependent on. So as soon as we encounter a leaf of the condition tree
- which is a field item, we set the SAME_FIELD::item to the found
- field item and then compare the rest of the fields in the predicate with
- the field item.
- */
-
- if (same_field_arg->item == NULL)
- {
- same_field_arg->item= this;
- same_field_arg->is_stats_available=
- field->is_range_statistics_available() ||
- (item_equal && item_equal->is_statistics_available());
- return false;
- }
-
- DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
- /* Found the same field while traversing the condition tree */
- if (((Item_field*)same_field_arg->item->real_item())->field == field)
- return false;
-
- if (!same_field_arg->item->get_item_equal())
- return true;
-
- return !(same_field_arg->item->get_item_equal() == item_equal);
-}
-
-
-bool Item_direct_view_ref::predicate_selectivity_checker(void *arg)
-{
- SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
- if (same_field_arg->item == real_item())
- {
- DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM);
- same_field_arg->item= this;
- same_field_arg->is_stats_available|=
- (item_equal && item_equal->is_statistics_available());
- }
- return false;
-}
-
-
bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map)
{
table_map used= used_tables();
@@ -10817,3 +10771,57 @@ bool Item::is_non_const_field_item()
return true;
return false;
}
+
+
+/*
+ @brief
+ Checks if a formula of a condition contains the same column
+
+ @details
+ In the function we try to check if a formula of a condition depends
+ (directly or indirectly through equalities inferred from the
+ conjuncted multiple equalities) only on one column.
+
+ Eg:
+ WHERE clause is:
+ t1.a=t2.b and (t1.a > 5 or t2.b < 1);
+
+ the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
+ equalities to conclude that it depends on one column.
+
+ This is used mostly for OR conjuncts where we need to make sure
+ that the entire OR conjunct contains only one column, so that we may
+ get accurate estimates.
+ An example with top level OR conjunct would be:
+ WHERE A=1 or A between 100 and 200 or A > 1000
+
+ @retval
+ TRUE : the formula depends on one column
+ FALSE : OTHERWISE
+*/
+
+bool Item::is_resolved_by_same_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*) arg;
+ if (same_field_arg->item == NULL)
+ {
+ Field *field= ((Item_field *)real_item())->field;
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ (field->is_range_statistics_available() ||
+ (get_item_equal() &&
+ get_item_equal()->is_statistics_available()));
+ return true;
+ }
+
+ Item *item= same_field_arg->item;
+
+ if (((Item_field*)item->real_item())->field ==
+ ((Item_field*)real_item())->field)
+ return true;
+
+ if (!item->get_item_equal())
+ return false;
+
+ return (item->get_item_equal() == get_item_equal());
+}
diff --git a/sql/item.h b/sql/item.h
index 2dfaa2e260c..52b67bbaf3f 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -624,6 +624,9 @@ typedef bool (Item::*Item_analyzer) (uchar **argp);
typedef Item* (Item::*Item_transformer) (THD *thd, uchar *arg);
typedef void (*Cond_traverser) (const Item *item, void *arg);
typedef bool (Item::*Pushdown_checker) (uchar *arg);
+typedef bool (Item::*Predicate_checker) (void *arg);
+
+
struct st_cond_statistic;
@@ -1998,7 +2001,8 @@ public:
virtual bool find_selective_predicates_list_processor(void *arg) { return 0; }
- bool with_accurate_selectivity_estimation();
+ bool with_accurate_selectivity_estimation(Predicate_checker checker,
+ void *arg, bool top_level);
/*
@brief
@@ -2514,6 +2518,7 @@ public:
*/
bool pushable_equality_checker_for_subquery(uchar *arg);
bool is_non_const_field_item();
+ bool is_resolved_by_same_column(void *arg);
};
MEM_ROOT *get_thd_memroot(THD *thd);
@@ -3621,7 +3626,6 @@ public:
return field->table->pos_in_table_list->outer_join;
}
bool check_index_dependence(void *arg);
- bool predicate_selectivity_checker(void *arg);
friend class Item_default_value;
friend class Item_insert_value;
friend class st_select_lex_unit;
@@ -5992,7 +5996,6 @@ public:
Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg)
{ return this; }
Item *remove_item_direct_ref() { return this; }
- bool predicate_selectivity_checker(void *arg);
};
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 584893536a3..1118fa8dd8d 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2103,18 +2103,16 @@ bool Item_func_between::count_sargable_conds(void *arg)
return 0;
}
+
bool Item_func_between::predicate_selectivity_checker(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
- return true;
-
if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
{
- if (is_range_predicate(args[0], args[1]) &&
- is_range_predicate(args[0], args[2]))
- return false;
- return true;
+ if (!(is_range_predicate(args[0], args[1]) &&
+ is_range_predicate(args[0], args[2])))
+ return true;
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
for (uint i= 1 ; i < arg_count ; i++)
@@ -2123,6 +2121,9 @@ bool Item_func_between::predicate_selectivity_checker(void *arg)
{
if (!is_range_predicate(args[i], args[0]))
return true;
+ if (!((args[i]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available)))
+ return true;
}
}
return false;
@@ -4314,11 +4315,23 @@ bool Item_func_in::count_sargable_conds(void *arg)
bool Item_func_in::predicate_selectivity_checker(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!all_items_are_consts(args + 1, arg_count - 1))
return true;
- if (all_items_are_consts(args + 1, arg_count - 1))
- return false;
+
+ if (args[0]->type() == Item::ROW_ITEM)
+ {
+ /*
+ Currently not walking inside the row item to check if all the fields are
+ same or not, that is not a case that users would be using, so just
+ disallowing such predicates and returning that selectivity for the
+ predicate is not available
+ */
+ return true;
+ }
+
+ if (args[0]->is_non_const_field_item())
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
return true;
}
@@ -5551,13 +5564,10 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg)
bool Item_func_null_predicate::predicate_selectivity_checker(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!args[0]->is_non_const_field_item())
return true;
-
- if (args[0]->is_non_const_field_item())
- return false;
- return true;
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
@@ -5645,14 +5655,14 @@ bool Item_bool_func2::count_sargable_conds(void *arg)
bool Item_bool_func2::predicate_selectivity_checker(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!(is_range_predicate(args[0], args[1]) ||
+ is_range_predicate(args[1], args[0])))
return true;
- if (is_range_predicate(args[0], args[1]) ||
- is_range_predicate(args[1], args[0]))
- return false;
- return true;
+ Item *item= args[0]->is_non_const_field_item() ? args[0] : args[1];
+
+ return !(item->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
@@ -5763,13 +5773,10 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
bool Item_func_like::predicate_selectivity_checker(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!with_sargable_pattern())
return true;
-
- if (with_sargable_pattern())
- return false;
- return true;
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
@@ -7243,17 +7250,29 @@ bool Item_equal::predicate_selectivity_checker(void *arg)
field->is_ndv_available()))
return true;
}
- return false;
+ }
+ else
+ {
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ field->is_range_statistics_available();
+ if (!field->is_ndv_available())
+ return true;
+ }
}
+ it.rewind();
+ Item *item= (it++);
+ SAME_FIELD *same_field_arg= (SAME_FIELD *) arg;
- while (it++)
+ if (same_field_arg->item == NULL)
{
- Field *field= it.get_curr_field();
- if (!(field->is_ndv_available()))
- return true;
+ item->is_resolved_by_same_column(arg);
+ return false;
}
- return false;
+
+ return !item->is_resolved_by_same_column(arg);
}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 3323c4212f6..58dce74a146 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -227,7 +227,6 @@ public:
bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; }
uint decimal_precision() const { return 1; }
bool need_parentheses_in_default() { return true; }
- bool predicate_selectivity_checker(void *arg) { return TRUE; }
};
@@ -420,6 +419,7 @@ public:
bool top_level);
bool count_sargable_conds(void *arg);
bool predicate_selectivity_checker(void *arg);
+
/*
Specifies which result type the function uses to compare its arguments.
This method is used in equal field propagation.
@@ -3386,7 +3386,6 @@ public:
SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_and>(thd, this); }
- bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
inline bool is_cond_and(Item *item)
@@ -3411,7 +3410,6 @@ public:
Item *neg_transformer(THD *thd);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_or>(thd, this); }
- bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
class Item_func_dyncol_check :public Item_bool_func
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b56f363f3bf..c894c5120f6 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8303,10 +8303,13 @@ choose_plan(JOIN *join, table_map join_tables)
Json_writer_object wrapper(thd);
- if (join->conds)
+ if (join->conds)
+ {
+ SAME_FIELD arg= {NULL, false};
wrapper.add("cardinality_accurate",
- join->conds->with_accurate_selectivity_estimation());
-
+ join->conds->with_accurate_selectivity_estimation(&Item::predicate_selectivity_checker,
+ &arg, TRUE));
+ }
Json_writer_array trace_plan(thd,"considered_execution_plans");
if (!join->emb_sjm_nest)