From da5214831d3d0f0880b4068a06ce062ca703293f Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 17 May 2012 13:46:05 +0300 Subject: Fix for bug lp:944706, task MDEV-193 The patch enables back constant subquery execution during query optimization after it was disabled during the development of MWL#89 (cost-based choice of IN-TO-EXISTS vs MATERIALIZATION). The main idea is that constant subqueries are allowed to be executed during optimization if their execution is not expensive. The approach is as follows: - Constant subqueries are recursively optimized in the beginning of JOIN::optimize of the outer query. This is done by the new method JOIN::optimize_constant_subqueries(). This is done so that the cost of executing these queries can be estimated. - Optimization of the outer query proceeds normally. During this phase the optimizer may request execution of non-expensive constant subqueries. Each place where the optimizer may potentially execute an expensive expression is guarded with the predicate Item::is_expensive(). - The implementation of Item_subselect::is_expensive has been extended to use the number of examined rows (estimated by the optimizer) as a way to determine whether the subquery is expensive or not. - The new system variable "expensive_subquery_limit" controls how many examined rows are considered to be not expensive. The default is 100. In addition, multiple changes were needed to make this solution work in the light of the changes made by MWL#89. These changes were needed to fix various crashes and wrong results, and legacy bugs discovered during development. --- sql/item_subselect.cc | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'sql/item_subselect.cc') diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 276f35fe301..1367037d17e 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -522,6 +522,48 @@ void Item_subselect::recalc_used_tables(st_select_lex *new_parent, */ } + +/** + Determine if a subquery is expensive to execute during query optimization. + + @details The cost of execution of a subquery is estimated based on an + estimate of the number of rows the subquery will access during execution. + This measure is used instead of JOIN::read_time, because it is considered + to be much more reliable than the cost estimate. + + @return true if the subquery is expensive + @return false otherwise +*/ +bool Item_subselect::is_expensive() +{ + double examined_rows= 0; + + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + { + JOIN *cur_join= sl->join; + if (!cur_join) + continue; + + /* If a subquery is not optimized we cannot estimate its cost. */ + if (!cur_join->join_tab) + return true; + + if (sl->first_inner_unit()) + { + /* + Subqueries that contain subqueries are considered expensive. + @todo: accumulate the cost of subqueries. + */ + return true; + } + + examined_rows+= cur_join->get_examined_rows(); + } + + return (examined_rows > thd->variables.expensive_subquery_limit); +} + + bool Item_subselect::walk(Item_processor processor, bool walk_subquery, uchar *argument) { -- cgit v1.2.1 From 88d3d853f4802ea48bcbe0b017d9e2403895632d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 14 Jun 2012 17:03:09 +0300 Subject: Fix bug lp:1008773 Analysis: Queries with implicit grouping (there is aggregate, but no group by) follow some non-obvious semantics in the case of empty result set. Aggregate functions produce some special "natural" value depending on the function. For instance MIN/MAX return NULL, COUNT returns 0. The complexity comes from non-aggregate expressions in the select list. If the non-aggregate expression is a constant, it can be computed, so we should return its value, however if the expression is non-constant, and depends on columns from the empty result set, then the only meaningful value is NULL. The cause of the wrong result was that for subqueries the optimizer didn't make a difference between constant and non-constant ones in the case of empty result for implicit grouping. Solution: In all implementations of Item_subselect::no_rows_in_result() check if the subquery predicate is constant. If it is constant, do not set it to the default value for implicit grouping, instead let it be evaluated. --- sql/item_subselect.cc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'sql/item_subselect.cc') diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index cae1a9f7541..5af8eb9ebc9 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -892,6 +892,8 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type) void Item_maxmin_subselect::no_rows_in_result() { + if (const_item()) + return; value= Item_cache::get_cache(new Item_null()); null_value= 0; was_values= 0; @@ -901,6 +903,8 @@ void Item_maxmin_subselect::no_rows_in_result() void Item_singlerow_subselect::no_rows_in_result() { + if (const_item()) + return; value= Item_cache::get_cache(new Item_null()); reset(); make_const(); @@ -1363,6 +1367,8 @@ Item* Item_exists_subselect::expr_cache_insert_transformer(uchar *thd_arg) void Item_exists_subselect::no_rows_in_result() { + if (const_item()) + return; value= 0; null_value= 0; make_const(); @@ -2707,6 +2713,8 @@ void Item_allany_subselect::print(String *str, enum_query_type query_type) void Item_allany_subselect::no_rows_in_result() { + if (const_item()) + return; value= 0; null_value= 0; was_null= 0; -- cgit v1.2.1 From db6dbadb5a9edd9e93398b6afe8e3196eb768e0a Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 15 Jun 2012 11:33:24 +0300 Subject: Fix bug lp:1008686 Analysis: The fix for bug lp:985667 implements the method Item_subselect::no_rows_in_result() for all main kinds of subqueries. The purpose of this method is to be called from return_zero_rows() and set Items to some default value in the case when a query returns no rows. Aggregates and subqueries require special treatment in this case. Every implementation of Item_subselect::no_rows_in_result() called Item_subselect::make_const() to set the subquery predicate to its default value irrespective of where the predicate was located in the query. Once the predicate was set to a constant it was never executed. At the same time, the JOIN object of the fake select for UNIONs (the one used for the final result of the UNION), was set after all subqueries in the union were executed. Since we set the subquery as constant, it was never executed, and the corresponding JOIN was never created. In order to decide whether the result of NOT IN is NULL or FALSE, Item_in_optimizer needs to check if the subquery result was empty or not. This is where we got the crash, because subselect_union_engine::no_rows() checks for unit->fake_select_lex->join->send_records, and the join object was NULL. Solution: If a subquery is in the HAVING clause it must be evaluated in order to know its result, so that we can properly filter the result records. Once subqueries in the HAVING clause are executed even in the case of no result rows, this specific crash will be solved, because the UNION will be executed, and its JOIN will be constructed. Therefore the fix for this crash is to narrow the fix for lp:985667, and to apply Item_subselect::no_rows_in_result() only when the subquery predicate is in the SELECT clause. --- sql/item_subselect.cc | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'sql/item_subselect.cc') diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 5af8eb9ebc9..16f754575f5 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -892,7 +892,14 @@ void Item_maxmin_subselect::print(String *str, enum_query_type query_type) void Item_maxmin_subselect::no_rows_in_result() { - if (const_item()) + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) return; value= Item_cache::get_cache(new Item_null()); null_value= 0; @@ -903,7 +910,14 @@ void Item_maxmin_subselect::no_rows_in_result() void Item_singlerow_subselect::no_rows_in_result() { - if (const_item()) + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) return; value= Item_cache::get_cache(new Item_null()); reset(); @@ -1367,7 +1381,14 @@ Item* Item_exists_subselect::expr_cache_insert_transformer(uchar *thd_arg) void Item_exists_subselect::no_rows_in_result() { - if (const_item()) + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) return; value= 0; null_value= 0; @@ -2713,7 +2734,14 @@ void Item_allany_subselect::print(String *str, enum_query_type query_type) void Item_allany_subselect::no_rows_in_result() { - if (const_item()) + /* + Subquery predicates outside of the SELECT list must be evaluated in order + to possibly filter the special result row generated for implicit grouping + if the subquery is in the HAVING clause. + If the predicate is constant, we need its actual value in the only result + row for queries with implicit grouping. + */ + if (parsing_place != SELECT_LIST || const_item()) return; value= 0; null_value= 0; -- cgit v1.2.1