diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2021-02-05 23:54:28 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2021-02-06 14:22:01 +0530 |
commit | cb5223a49f754e52dd2f9f85632c9110b6a70a29 (patch) | |
tree | f5d4c48355139349f04ea456c7adbd843965eb35 | |
parent | 957531a06bb0dd2c082d07c94f6e3f4557c1621f (diff) | |
download | mariadb-git-cb5223a49f754e52dd2f9f85632c9110b6a70a29.tar.gz |
Using recurisive descent instead of walk to check if the join cardinality estimates are precise
-rw-r--r-- | mysql-test/main/join_cardinality.result | 2 | ||||
-rw-r--r-- | sql/field.cc | 6 | ||||
-rw-r--r-- | sql/item.cc | 198 | ||||
-rw-r--r-- | sql/item.h | 9 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 91 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 4 | ||||
-rw-r--r-- | sql/sql_select.cc | 9 |
7 files changed, 176 insertions, 143 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result index e41438e1a3e..e98de9c087b 100644 --- a/mysql-test/main/join_cardinality.result +++ b/mysql-test/main/join_cardinality.result @@ -733,7 +733,7 @@ SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) [ - true + false ] EXPLAIN SELECT * from t1 WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3)); diff --git a/sql/field.cc b/sql/field.cc index 3b721d0316d..5d4a3eca93f 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -11403,7 +11403,8 @@ void Field::statistics_available_via_stat_tables() if (thd->variables.optimizer_use_condition_selectivity > 2 && check_eits_preferred(thd)) { - if (read_stats && !read_stats->no_stat_values_provided()) + if (table && table->stats_is_read && + read_stats && !read_stats->no_stat_values_provided()) { // Only need min and max values for a column to get range statistics if (read_stats->min_max_values_are_provided()) @@ -11502,7 +11503,8 @@ bool Field::is_ndv_available_via_stat_tables() { if (check_eits_preferred(get_thd())) { - if ((read_stats && !read_stats->no_stat_values_provided() && + if ((table && table->stats_is_read && + read_stats && !read_stats->no_stat_values_provided() && !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))) { stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); diff --git a/sql/item.cc b/sql/item.cc index 408b8dd7f8b..6cacf27ecce 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7509,7 +7509,6 @@ Item *Item::build_pushable_cond(THD *thd, this argument will be a reference to a column that is used either as the first component of an index or statistics are available via statistical tables. - b. Equalities: For an equality to have accurate selectivity estimates, the number of distinct values for each column in the equality @@ -7521,7 +7520,6 @@ Item *Item::build_pushable_cond(THD *thd, The number of distinct values for a column can be known by 1) from indexes via rec_per_key 2) from statistical tables via avg_frequency. - 2. (recursive step) AND / OR formula over formulas defined in section 1 of the definition. @@ -7564,11 +7562,14 @@ Item *Item::build_pushable_cond(THD *thd, In the end for all fields we may have selectivity from an index or statistical tables. + 3. (optional additional non-recursive step) + Conjunction of formulas described in sections 1 and 2. @notes - The implementation for this function use the 'walk' method to traverse - the tree of this item with predicate_selectivity_checker() as the - call-back parameter of the method. + The implementation for this function use the recursive descent to walk + over the condition tree. A call-back function + predicate_selectivity_checker() is passed as a parameter to this function. + propagate_equal_fields() is called before this function is called so Item_field::item_equal and Item_direct_view_ref::item_equal is set. @@ -7577,27 +7578,55 @@ Item *Item::build_pushable_cond(THD *thd, FALSE OTHERWISE */ -bool Item::with_accurate_selectivity_estimation() +bool Item::with_accurate_selectivity_estimation(Predicate_checker checker, + void *arg, bool top_level) { - /* - For the below test one could use a virtual function but that would - take a lot of space for other item as there will be entires in the vtable - */ - if (type() == Item::COND_ITEM && - ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + if (type() == Item::COND_ITEM) { - List_iterator<Item> li(*((Item_cond*) this)->argument_list()); + Item_cond_and *and_cond= + (((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) ? + ((Item_cond_and*) this) : 0; + + List<Item> *arg_list= ((Item_cond*) this)->argument_list(); + List_iterator<Item> li(*arg_list); Item *item; - while ((item= li++)) + + if (and_cond) + { + uint counter=0; + while ((item=li++)) + { + SAME_FIELD arg_same_field= {NULL, FALSE}; + if (item->with_accurate_selectivity_estimation(checker, + (top_level ? + (void *)&arg_same_field : + arg), + top_level)) + { + counter++; + } + } + + if (counter == arg_list->elements) + return true; + } + else { - SAME_FIELD arg= {NULL, false}; - if (item->walk(&Item::predicate_selectivity_checker, 0, &arg)) - return false; + uint counter=0; + while ((item=li++)) + { + if (item->with_accurate_selectivity_estimation(checker, arg, FALSE)) + { + counter++; + } + } + if (counter == arg_list->elements) + return true; } - return true; + return false; } - SAME_FIELD arg= {NULL, false}; - return !walk(&Item::predicate_selectivity_checker, 0, &arg); + + return !((this->*checker) (arg)); } @@ -9327,81 +9356,6 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) } -/* - @brief - Checks if a formula of a condition contains the same column - - @details - In the function we try to check if a formula of a condition depends - (directly or indirectly through equalities inferred from the - conjuncted multiple equalities) only on one column. - - Eg: - WHERE clause is: - t1.a=t2.b and (t1.a > 5 or t2.b < 1); - - the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of - equalities to conclude that it depends on one column. - - This is used mostly for OR conjuncts where we need to make sure - that the entire OR conjunct contains only one column, so that we may - get accurate estimates. - An example with top level OR conjunct would be: - WHERE A=1 or A between 100 and 200 or A > 1000 - - @retval - TRUE : the formula does not depend on one column - FALSE : OTHERWISE -*/ - -bool Item_field::predicate_selectivity_checker(void *arg) -{ - SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; - - /* - The same_field_arg is passed as a parameter because when we start walking - over the condition tree we don't know which column the predicate will be - dependent on. So as soon as we encounter a leaf of the condition tree - which is a field item, we set the SAME_FIELD::item to the found - field item and then compare the rest of the fields in the predicate with - the field item. - */ - - if (same_field_arg->item == NULL) - { - same_field_arg->item= this; - same_field_arg->is_stats_available= - field->is_range_statistics_available() || - (item_equal && item_equal->is_statistics_available()); - return false; - } - - DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); - /* Found the same field while traversing the condition tree */ - if (((Item_field*)same_field_arg->item->real_item())->field == field) - return false; - - if (!same_field_arg->item->get_item_equal()) - return true; - - return !(same_field_arg->item->get_item_equal() == item_equal); -} - - -bool Item_direct_view_ref::predicate_selectivity_checker(void *arg) -{ - SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; - if (same_field_arg->item == real_item()) - { - DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); - same_field_arg->item= this; - same_field_arg->is_stats_available|= - (item_equal && item_equal->is_statistics_available()); - } - return false; -} - - bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) { table_map used= used_tables(); @@ -10817,3 +10771,57 @@ bool Item::is_non_const_field_item() return true; return false; } + + +/* + @brief + Checks if a formula of a condition contains the same column + + @details + In the function we try to check if a formula of a condition depends + (directly or indirectly through equalities inferred from the + conjuncted multiple equalities) only on one column. + + Eg: + WHERE clause is: + t1.a=t2.b and (t1.a > 5 or t2.b < 1); + + the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of + equalities to conclude that it depends on one column. + + This is used mostly for OR conjuncts where we need to make sure + that the entire OR conjunct contains only one column, so that we may + get accurate estimates. + An example with top level OR conjunct would be: + WHERE A=1 or A between 100 and 200 or A > 1000 + + @retval + TRUE : the formula depends on one column + FALSE : OTHERWISE +*/ + +bool Item::is_resolved_by_same_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*) arg; + if (same_field_arg->item == NULL) + { + Field *field= ((Item_field *)real_item())->field; + same_field_arg->item= this; + same_field_arg->is_stats_available= + (field->is_range_statistics_available() || + (get_item_equal() && + get_item_equal()->is_statistics_available())); + return true; + } + + Item *item= same_field_arg->item; + + if (((Item_field*)item->real_item())->field == + ((Item_field*)real_item())->field) + return true; + + if (!item->get_item_equal()) + return false; + + return (item->get_item_equal() == get_item_equal()); +} diff --git a/sql/item.h b/sql/item.h index 2dfaa2e260c..52b67bbaf3f 100644 --- a/sql/item.h +++ b/sql/item.h @@ -624,6 +624,9 @@ typedef bool (Item::*Item_analyzer) (uchar **argp); typedef Item* (Item::*Item_transformer) (THD *thd, uchar *arg); typedef void (*Cond_traverser) (const Item *item, void *arg); typedef bool (Item::*Pushdown_checker) (uchar *arg); +typedef bool (Item::*Predicate_checker) (void *arg); + + struct st_cond_statistic; @@ -1998,7 +2001,8 @@ public: virtual bool find_selective_predicates_list_processor(void *arg) { return 0; } - bool with_accurate_selectivity_estimation(); + bool with_accurate_selectivity_estimation(Predicate_checker checker, + void *arg, bool top_level); /* @brief @@ -2514,6 +2518,7 @@ public: */ bool pushable_equality_checker_for_subquery(uchar *arg); bool is_non_const_field_item(); + bool is_resolved_by_same_column(void *arg); }; MEM_ROOT *get_thd_memroot(THD *thd); @@ -3621,7 +3626,6 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg); - bool predicate_selectivity_checker(void *arg); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; @@ -5992,7 +5996,6 @@ public: Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) { return this; } Item *remove_item_direct_ref() { return this; } - bool predicate_selectivity_checker(void *arg); }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 584893536a3..1118fa8dd8d 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2103,18 +2103,16 @@ bool Item_func_between::count_sargable_conds(void *arg) return 0; } + bool Item_func_between::predicate_selectivity_checker(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) - return true; - if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM) { - if (is_range_predicate(args[0], args[1]) && - is_range_predicate(args[0], args[2])) - return false; - return true; + if (!(is_range_predicate(args[0], args[1]) && + is_range_predicate(args[0], args[2]))) + return true; + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } for (uint i= 1 ; i < arg_count ; i++) @@ -2123,6 +2121,9 @@ bool Item_func_between::predicate_selectivity_checker(void *arg) { if (!is_range_predicate(args[i], args[0])) return true; + if (!((args[i]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available))) + return true; } } return false; @@ -4314,11 +4315,23 @@ bool Item_func_in::count_sargable_conds(void *arg) bool Item_func_in::predicate_selectivity_checker(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!all_items_are_consts(args + 1, arg_count - 1)) return true; - if (all_items_are_consts(args + 1, arg_count - 1)) - return false; + + if (args[0]->type() == Item::ROW_ITEM) + { + /* + Currently not walking inside the row item to check if all the fields are + same or not, that is not a case that users would be using, so just + disallowing such predicates and returning that selectivity for the + predicate is not available + */ + return true; + } + + if (args[0]->is_non_const_field_item()) + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); return true; } @@ -5551,13 +5564,10 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg) bool Item_func_null_predicate::predicate_selectivity_checker(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!args[0]->is_non_const_field_item()) return true; - - if (args[0]->is_non_const_field_item()) - return false; - return true; + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } @@ -5645,14 +5655,14 @@ bool Item_bool_func2::count_sargable_conds(void *arg) bool Item_bool_func2::predicate_selectivity_checker(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!(is_range_predicate(args[0], args[1]) || + is_range_predicate(args[1], args[0]))) return true; - if (is_range_predicate(args[0], args[1]) || - is_range_predicate(args[1], args[0])) - return false; - return true; + Item *item= args[0]->is_non_const_field_item() ? args[0] : args[1]; + + return !(item->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } @@ -5763,13 +5773,10 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) bool Item_func_like::predicate_selectivity_checker(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!with_sargable_pattern()) return true; - - if (with_sargable_pattern()) - return false; - return true; + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } @@ -7243,17 +7250,29 @@ bool Item_equal::predicate_selectivity_checker(void *arg) field->is_ndv_available())) return true; } - return false; + } + else + { + while (it++) + { + Field *field= it.get_curr_field(); + field->is_range_statistics_available(); + if (!field->is_ndv_available()) + return true; + } } + it.rewind(); + Item *item= (it++); + SAME_FIELD *same_field_arg= (SAME_FIELD *) arg; - while (it++) + if (same_field_arg->item == NULL) { - Field *field= it.get_curr_field(); - if (!(field->is_ndv_available())) - return true; + item->is_resolved_by_same_column(arg); + return false; } - return false; + + return !item->is_resolved_by_same_column(arg); } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 3323c4212f6..58dce74a146 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -227,7 +227,6 @@ public: bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; } uint decimal_precision() const { return 1; } bool need_parentheses_in_default() { return true; } - bool predicate_selectivity_checker(void *arg) { return TRUE; } }; @@ -420,6 +419,7 @@ public: bool top_level); bool count_sargable_conds(void *arg); bool predicate_selectivity_checker(void *arg); + /* Specifies which result type the function uses to compare its arguments. This method is used in equal field propagation. @@ -3386,7 +3386,6 @@ public: SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_and>(thd, this); } - bool predicate_selectivity_checker(void *arg) { return FALSE; } }; inline bool is_cond_and(Item *item) @@ -3411,7 +3410,6 @@ public: Item *neg_transformer(THD *thd); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_or>(thd, this); } - bool predicate_selectivity_checker(void *arg) { return FALSE; } }; class Item_func_dyncol_check :public Item_bool_func diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b56f363f3bf..c894c5120f6 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8303,10 +8303,13 @@ choose_plan(JOIN *join, table_map join_tables) Json_writer_object wrapper(thd); - if (join->conds) + if (join->conds) + { + SAME_FIELD arg= {NULL, false}; wrapper.add("cardinality_accurate", - join->conds->with_accurate_selectivity_estimation()); - + join->conds->with_accurate_selectivity_estimation(&Item::predicate_selectivity_checker, + &arg, TRUE)); + } Json_writer_array trace_plan(thd,"considered_execution_plans"); if (!join->emb_sjm_nest) |