diff options
author | unknown <igor@rurik.mysql.com> | 2005-10-22 19:35:37 -0700 |
---|---|---|
committer | unknown <igor@rurik.mysql.com> | 2005-10-22 19:35:37 -0700 |
commit | cd49e32407dcc88a86a26b4c8720afd9ef99b7dd (patch) | |
tree | ebe3a32ff819aa215d075626945538fd5178dbf5 /sql | |
parent | d29ecd5e91adf6eaeb4f2afef0db30648fe08065 (diff) | |
parent | 8a84dd7e7f9d699c84d2ed52fe6189f01e6d0ce6 (diff) | |
download | mariadb-git-cd49e32407dcc88a86a26b4c8720afd9ef99b7dd.tar.gz |
Merge rurik.mysql.com:/home/igor/mysql-5.0
into rurik.mysql.com:/home/igor/dev/mysql-5.0-2
sql/item.cc:
Auto merged
sql/item.h:
Auto merged
sql/item_cmpfunc.cc:
Auto merged
sql/item_func.cc:
Auto merged
sql/mysql_priv.h:
Auto merged
sql/sql_class.cc:
Auto merged
sql/sql_delete.cc:
Auto merged
sql/sql_lex.cc:
Auto merged
sql/sql_lex.h:
Auto merged
sql/sql_parse.cc:
Auto merged
sql/sql_update.cc:
Auto merged
sql/sql_yacc.yy:
Auto merged
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item.cc | 52 | ||||
-rw-r--r-- | sql/item.h | 2 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 2 | ||||
-rw-r--r-- | sql/item_func.cc | 2 | ||||
-rw-r--r-- | sql/item_row.cc | 2 | ||||
-rw-r--r-- | sql/item_strfunc.cc | 2 | ||||
-rw-r--r-- | sql/item_subselect.cc | 4 | ||||
-rw-r--r-- | sql/item_sum.cc | 266 | ||||
-rw-r--r-- | sql/item_sum.h | 204 | ||||
-rw-r--r-- | sql/mysql_priv.h | 1 | ||||
-rw-r--r-- | sql/sql_base.cc | 6 | ||||
-rw-r--r-- | sql/sql_class.cc | 2 | ||||
-rw-r--r-- | sql/sql_class.h | 13 | ||||
-rw-r--r-- | sql/sql_delete.cc | 2 | ||||
-rw-r--r-- | sql/sql_lex.cc | 5 | ||||
-rw-r--r-- | sql/sql_lex.h | 14 | ||||
-rw-r--r-- | sql/sql_parse.cc | 2 | ||||
-rw-r--r-- | sql/sql_prepare.cc | 3 | ||||
-rw-r--r-- | sql/sql_select.cc | 30 | ||||
-rw-r--r-- | sql/sql_update.cc | 2 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 1 |
21 files changed, 544 insertions, 73 deletions
diff --git a/sql/item.cc b/sql/item.cc index b2aca750475..b0aa481abf0 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1047,6 +1047,7 @@ void Item_name_const::print(String *str) ref_pointer_array Pointer to array of reference fields fields All fields in select ref Pointer to item + skip_registered <=> the function must skipped for registered SUM items NOTES This is from split_sum_func2() for items that should be split @@ -1059,8 +1060,13 @@ void Item_name_const::print(String *str) void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, - List<Item> &fields, Item **ref) + List<Item> &fields, Item **ref, + bool skip_registered) { + /* An item of type Item_sum is registered <=> ref_by != 0 */ + if (type() == SUM_FUNC_ITEM && skip_registered && + ((Item_sum *) this)->ref_by) + return; if (type() != SUM_FUNC_ITEM && with_sum_func) { /* Will split complicated items and ignore simple ones */ @@ -3192,14 +3198,8 @@ resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select) { for each outer query Q_k beginning from the inner-most one { - if - Q_k is not a group query AND - - Q_k is not inside an aggregate function - OR - - Q_(k-1) is not in a HAVING or SELECT clause of Q_k - { - search for a column or derived column named col_ref_i - [in table T_j] in the FROM clause of Q_k; - } + search for a column or derived column named col_ref_i + [in table T_j] in the FROM clause of Q_k; if such a column is not found Search for a column or derived column named col_ref_i @@ -3279,18 +3279,11 @@ bool Item_field::fix_fields(THD *thd, Item **reference) place= prev_subselect_item->parsing_place; /* - Check table fields only if the subquery is used somewhere out of - HAVING, or the outer SELECT does not use grouping (i.e. tables are - accessible). - In case of a view, find_field_in_tables() writes the pointer to the found view field into '*reference', in other words, it substitutes this Item_field with the found expression. */ - if ((place != IN_HAVING || - (!select->with_sum_func && - select->group_list.elements == 0)) && - (from_field= find_field_in_tables(thd, this, + if ((from_field= find_field_in_tables(thd, this, outer_context-> first_name_resolution_table, outer_context-> @@ -3308,6 +3301,21 @@ bool Item_field::fix_fields(THD *thd, Item **reference) { prev_subselect_item->used_tables_cache|= from_field->table->map; prev_subselect_item->const_item_cache= 0; + if (thd->lex->in_sum_func && + thd->lex->in_sum_func->nest_level == + thd->lex->current_select->nest_level) + { + Item::Type type= (*reference)->type(); + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + select->nest_level); + set_field(from_field); + fixed= 1; + mark_as_dependent(thd, last_checked_context->select_lex, + context->select_lex, this, + ((type == REF_ITEM || type == FIELD_ITEM) ? + (Item_ident*) (*reference) : 0)); + return FALSE; + } } else { @@ -3459,6 +3467,11 @@ bool Item_field::fix_fields(THD *thd, Item **reference) return FALSE; set_field(from_field); + if (thd->lex->in_sum_func && + thd->lex->in_sum_func->nest_level == + thd->lex->current_select->nest_level) + set_if_bigger(thd->lex->in_sum_func->max_arg_level, + thd->lex->current_select->nest_level); } else if (thd->set_query_id && field->query_id != thd->query_id) { @@ -4615,9 +4628,8 @@ bool Item_ref::fix_fields(THD *thd, Item **reference) aggregate function. */ if (((*ref)->with_sum_func && name && - (depended_from || - !(current_sel->linkage != GLOBAL_OPTIONS_TYPE && - current_sel->having_fix_field))) || + !(current_sel->linkage != GLOBAL_OPTIONS_TYPE && + current_sel->having_fix_field)) || !(*ref)->fixed) { my_error(ER_ILLEGAL_REFERENCE, MYF(0), diff --git a/sql/item.h b/sql/item.h index 3e52cbe5fd7..d29dcbf2d4f 100644 --- a/sql/item.h +++ b/sql/item.h @@ -599,7 +599,7 @@ public: List<Item> &fields) {} /* Called for items that really have to be split */ void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields, - Item **ref); + Item **ref, bool skip_registered); virtual bool get_date(TIME *ltime,uint fuzzydate); virtual bool get_time(TIME *ltime); virtual bool get_date_result(TIME *ltime,uint fuzzydate) diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 761d15c8a3e..c714929f33b 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2666,7 +2666,7 @@ void Item_cond::split_sum_func(THD *thd, Item **ref_pointer_array, List_iterator<Item> li(list); Item *item; while ((item= li++)) - item->split_sum_func2(thd, ref_pointer_array, fields, li.ref()); + item->split_sum_func2(thd, ref_pointer_array, fields, li.ref(), TRUE); } diff --git a/sql/item_func.cc b/sql/item_func.cc index 35ed9b615f2..2a031c76d0e 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -280,7 +280,7 @@ void Item_func::split_sum_func(THD *thd, Item **ref_pointer_array, { Item **arg, **arg_end; for (arg= args, arg_end= args+arg_count; arg != arg_end ; arg++) - (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg); + (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, TRUE); } diff --git a/sql/item_row.cc b/sql/item_row.cc index 9362518e6ef..75c3f8a2922 100644 --- a/sql/item_row.cc +++ b/sql/item_row.cc @@ -90,7 +90,7 @@ void Item_row::split_sum_func(THD *thd, Item **ref_pointer_array, { Item **arg, **arg_end; for (arg= items, arg_end= items+arg_count; arg != arg_end ; arg++) - (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg); + (*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, TRUE); } diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 1e8fe2e695f..772d441f2ea 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1864,7 +1864,7 @@ String *Item_func_elt::val_str(String *str) void Item_func_make_set::split_sum_func(THD *thd, Item **ref_pointer_array, List<Item> &fields) { - item->split_sum_func2(thd, ref_pointer_array, fields, &item); + item->split_sum_func2(thd, ref_pointer_array, fields, &item, TRUE); Item_str_func::split_sum_func(thd, ref_pointer_array, fields); } diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 8afc885e59b..b78b7d64430 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -803,6 +803,7 @@ Item_in_subselect::single_value_transformer(JOIN *join, !(select_lex->next_select())) { Item_sum_hybrid *item; + nesting_map save_allow_sum_func; if (func->l_op()) { /* @@ -828,6 +829,8 @@ Item_in_subselect::single_value_transformer(JOIN *join, it.replace(item); } + save_allow_sum_func= thd->lex->allow_sum_func; + thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level; /* Item_sum_(max|min) can't substitute other item => we can use 0 as reference, also Item_sum_(max|min) can't be fixed after creation, so @@ -835,6 +838,7 @@ Item_in_subselect::single_value_transformer(JOIN *join, */ if (item->fix_fields(thd, 0)) DBUG_RETURN(RES_ERROR); + thd->lex->allow_sum_func= save_allow_sum_func; /* we added aggregate function => we have to change statistic */ count_field_types(&join->tmp_table_param, join->all_fields, 0); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index b56d99cf245..85b2888712d 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -24,6 +24,234 @@ #include "mysql_priv.h" #include "sql_select.h" +/* + Prepare an aggregate function item for checking context conditions + + SYNOPSIS + init_sum_func_check() + thd reference to the thread context info + + DESCRIPTION + The function initializes the members of the Item_sum object created + for a set function that are used to check validity of the set function + occurrence. + If the set function is not allowed in any subquery where it occurs + an error is reported immediately. + + NOTES + This function is to be called for any item created for a set function + object when the traversal of trees built for expressions used in the query + is performed at the phase of context analysis. This function is to + be invoked at the descent of this traversal. + + RETURN + TRUE if an error is reported + FALSE otherwise +*/ + +bool Item_sum::init_sum_func_check(THD *thd) +{ + if (!thd->lex->allow_sum_func) + { + my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE), + MYF(0)); + return TRUE; + } + /* Set a reference to the nesting set function if there is any */ + in_sum_func= thd->lex->in_sum_func; + /* Save a pointer to object to be used in items for nested set functions */ + thd->lex->in_sum_func= this; + nest_level= thd->lex->current_select->nest_level; + ref_by= 0; + aggr_level= -1; + max_arg_level= -1; + max_sum_func_level= -1; + return FALSE; +} + +/* + Check constraints imposed on a usage of a set function + + SYNOPSIS + check_sum_func() + thd reference to the thread context info + ref location of the pointer to this item in the embedding expression + + DESCRIPTION + The method verifies whether context conditions imposed on a usage + of any set function are met for this occurrence. + It checks whether the set function occurs in the position where it + can be aggregated and, when it happens to occur in argument of another + set function, the method checks that these two functions are aggregated in + different subqueries. + If the context conditions are not met the method reports an error. + If the set function is aggregated in some outer subquery the method + adds it to the chain of items for such set functions that is attached + to the the st_select_lex structure for this subquery. + + NOTES + This function is to be called for any item created for a set function + object when the traversal of trees built for expressions used in the query + is performed at the phase of context analysis. This function is to + be invoked at the ascent of this traversal. + + IMPLEMENTATION + A number of designated members of the object are used to check the + conditions. They are specified in the comment before the Item_sum + class declaration. + Additionally a bitmap variable called allow_sum_func is employed. + It is included into the thd->lex structure. + The bitmap contains 1 at n-th position if the set function happens + to occur under a construct of the n-th level subquery where usage + of set functions are allowed (i.e either in the SELECT list or + in the HAVING clause of the corresponding subquery) + Consider the query: + SELECT SUM(t1.b) FROM t1 GROUP BY t1.a + HAVING t1.a IN (SELECT t2.c FROM t2 WHERE AVG(t1.b) > 20) AND + t1.a > (SELECT MIN(t2.d) FROM t2); + allow_sum_func will contain: + for SUM(t1.b) - 1 at the first position + for AVG(t1.b) - 1 at the first position, 0 at the second position + for MIN(t2.d) - 1 at the first position, 1 at the second position. + + RETURN + TRUE if an error is reported + FALSE otherwise +*/ + +bool Item_sum::check_sum_func(THD *thd, Item **ref) +{ + bool invalid= FALSE; + nesting_map allow_sum_func= thd->lex->allow_sum_func; + /* + The value of max_arg_level is updated if an argument of the set function + contains a column reference resolved against a subquery whose level is + greater than the current value of max_arg_level. + max_arg_level cannot be greater than nest level. + nest level is always >= 0 + */ + if (nest_level == max_arg_level) + { + /* + The function must be aggregated in the current subquery, + If it is there under a construct where it is not allowed + we report an error. + */ + invalid= !(allow_sum_func & (1 << max_arg_level)); + } + else if (max_arg_level >= 0 || !(allow_sum_func & (1 << nest_level))) + { + /* + The set function can be aggregated only in outer subqueries. + Try to find a subquery where it can be aggregated; + If we fail to find such a subquery report an error. + */ + if (register_sum_func(thd, ref)) + return TRUE; + invalid= aggr_level < 0 && !(allow_sum_func & (1 << nest_level)); + } + if (!invalid && aggr_level < 0) + aggr_level= nest_level; + /* + By this moment we either found a subquery where the set function is + to be aggregated and assigned a value that is >= 0 to aggr_level, + or set the value of 'invalid' to TRUE to report later an error. + */ + /* + Additionally we have to check whether possible nested set functions + are acceptable here: they are not, if the level of aggregation of + some of them is less than aggr_level. + */ + invalid= aggr_level <= max_sum_func_level; + if (invalid) + { + my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE), + MYF(0)); + return TRUE; + } + if (in_sum_func && in_sum_func->nest_level == nest_level) + { + /* + If the set function is nested adjust the value of + max_sum_func_level for the nesting set function. + */ + set_if_bigger(in_sum_func->max_sum_func_level, aggr_level); + } + thd->lex->in_sum_func= in_sum_func; + return FALSE; +} + +/* + Attach a set function to the subquery where it must be aggregated + + SYNOPSIS + register_sum_func() + thd reference to the thread context info + ref location of the pointer to this item in the embedding expression + + DESCRIPTION + The function looks for an outer subquery where the set function must be + aggregated. If it finds such a subquery then aggr_level is set to + the nest level of this subquery and the item for the set function + is added to the list of set functions used in nested subqueries + inner_sum_func_list defined for each subquery. When the item is placed + there the field 'ref_by' is set to ref. + + NOTES. + Now we 'register' only set functions that are aggregated in outer + subqueries. Actually it makes sense to link all set function for + a subquery in one chain. It would simplify the process of 'splitting' + for set functions. + + RETURN + FALSE if the executes without failures (currently always) + TRUE otherwise +*/ + +bool Item_sum::register_sum_func(THD *thd, Item **ref) +{ + SELECT_LEX *sl; + SELECT_LEX *aggr_sl= NULL; + nesting_map allow_sum_func= thd->lex->allow_sum_func; + for (sl= thd->lex->current_select->master_unit()->outer_select() ; + sl && sl->nest_level > max_arg_level; + sl= sl->master_unit()->outer_select() ) + { + if (aggr_level < 0 && (allow_sum_func & (1 << sl->nest_level))) + { + /* Found the most nested subquery where the function can be aggregated */ + aggr_level= sl->nest_level; + aggr_sl= sl; + } + } + if (sl && (allow_sum_func & (1 << sl->nest_level))) + { + /* + We reached the subquery of level max_arg_level and checked + that the function can be aggregated here. + The set function will be aggregated in this subquery. + */ + aggr_level= sl->nest_level; + aggr_sl= sl; + } + if (aggr_level >= 0) + { + ref_by= ref; + /* Add the object to the list of registered objects assigned to aggr_sl */ + if (!aggr_sl->inner_sum_func_list) + next= this; + else + { + next= aggr_sl->inner_sum_func_list->next; + aggr_sl->inner_sum_func_list->next= this; + } + aggr_sl->inner_sum_func_list= this; + + } + return FALSE; +} + + Item_sum::Item_sum(List<Item> &list) :arg_count(list.elements) { @@ -197,13 +425,9 @@ Item_sum_num::fix_fields(THD *thd, Item **ref) { DBUG_ASSERT(fixed == 0); - if (!thd->allow_sum_func) - { - my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE), - MYF(0)); + if (init_sum_func_check(thd)) return TRUE; - } - thd->allow_sum_func=0; // No included group funcs + decimals=0; maybe_null=0; for (uint i=0 ; i < arg_count ; i++) @@ -217,7 +441,10 @@ Item_sum_num::fix_fields(THD *thd, Item **ref) max_length=float_length(decimals); null_value=1; fix_length_and_dec(); - thd->allow_sum_func=1; // Allow group functions + + if (check_sum_func(thd, ref)) + return TRUE; + fixed= 1; return FALSE; } @@ -258,13 +485,9 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref) DBUG_ASSERT(fixed == 0); Item *item= args[0]; - if (!thd->allow_sum_func) - { - my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE), - MYF(0)); + + if (init_sum_func_check(thd)) return TRUE; - } - thd->allow_sum_func=0; // No included group funcs // 'item' can be changed during fix_fields if (!item->fixed && @@ -300,11 +523,14 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref) result_field=0; null_value=1; fix_length_and_dec(); - thd->allow_sum_func=1; // Allow group functions if (item->type() == Item::FIELD_ITEM) hybrid_field_type= ((Item_field*) item)->field->type(); else hybrid_field_type= Item::field_type(); + + if (check_sum_func(thd, ref)) + return TRUE; + fixed= 1; return FALSE; } @@ -2979,14 +3205,9 @@ Item_func_group_concat::fix_fields(THD *thd, Item **ref) uint i; /* for loop variable */ DBUG_ASSERT(fixed == 0); - if (!thd->allow_sum_func) - { - my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE), - MYF(0)); + if (init_sum_func_check(thd)) return TRUE; - } - thd->allow_sum_func= 0; maybe_null= 1; /* @@ -3008,8 +3229,11 @@ Item_func_group_concat::fix_fields(THD *thd, Item **ref) result.set_charset(collation.collation); result_field= 0; null_value= 1; - thd->allow_sum_func= 1; max_length= thd->variables.group_concat_max_len; + + if (check_sum_func(thd, ref)) + return TRUE; + fixed= 1; return FALSE; } diff --git a/sql/item_sum.h b/sql/item_sum.h index 87cc248e5e4..6bc4af1b836 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -23,6 +23,200 @@ #include <my_tree.h> +/* + Class Item_sum is the base class used for special expressions that SQL calls + 'set functions'. These expressions are formed with the help of aggregate + functions such as SUM, MAX, GROUP_CONCAT etc. + + GENERAL NOTES + + A set function can be used not in any position where an expression is + accepted. There are some quite explicable restrictions for the usage of + set functions. + + In the query: + SELECT AVG(b) FROM t1 WHERE SUM(b) > 20 GROUP by a + the usage of the set function AVG(b) is legal, while the usage of SUM(b) + is illegal. A WHERE condition must contain expressions that can be + evaluated for each row of the table. Yet the expression SUM(b) can be + evaluated only for each group of rows with the same value of column a. + In the query: + SELECT AVG(b) FROM t1 WHERE c > 30 GROUP BY a HAVING SUM(b) > 20 + both set function expressions AVG(b) and SUM(b) are legal. + + We can say that in a query without nested selects an occurrence of a + set function in an expression of the SELECT list or/and in the HAVING + clause is legal, while in the WHERE clause it's illegal. + + The general rule to detect whether a set function is legal in a query with + nested subqueries is much more complicated. + + Consider the the following query: + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL (SELECT t2.c FROM t2 WHERE SUM(t1.b) < t2.c). + The set function SUM(b) is used here in the WHERE clause of the subquery. + Nevertheless it is legal since it is under the HAVING clause of the query + to which this function relates. The expression SUM(t1.b) is evaluated + for each group defined in the main query, not for groups of the subquery. + + The problem of finding the query where to aggregate a particular + set function is not so simple is it seems to be. + + In the query: + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.c FROM t2 GROUP BY t2.c + HAVING SUM(t1.a) < t2.c) + the set function can be evaluated for both outer and inner selects. + If we evaluate SUM(t.a) for the outer query then we get the value of t.a + multiplied by the cardinality of a group in table t1. In this case + in each correlated subquery SUM(t1.a) is used as a constant. But we also + can evaluate SUM(t.a) for the inner query. In this case t.a will be a + constant for each correlated subquery and summation is performed + for each group of table t2. + (Here it makes sense to remind that the query + SELECT c FROM t GROUP BY a HAVING SUM(1) < a + is quite legal in our SQL). + + So depending on to what query we assign the set function we + can get different result sets. + + The general rule to detect the query where a set function is to be + evaluated can be formulated as follows. + Consider a set function S(E) where E is an expression with occurrences + of column references C1, ..., CN. Resolve these references against + subqueries whose subexpression the set function S(E) is. Let Q be the + most inner subquery of those subqueries. (It should be noted here that S(E) + in no way can be evaluated in the subquery embedding the subquery Q.) + If S(E) is used in a construct of Q where set function are allowed then + we evaluate S(E) in Q. + Otherwise we look for a most inner subquery containing S(E) of those where + usage of S(E) is allowed. + + Let's demonstrate how this rule is applied to the following queries. + + 1. SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b + HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c + HAVING SUM(t1.a+t2.b) < t3.c)) + For this query the set function SUM(t1.a+t2.b) depends on t1.a and t2.b + with t1.a defined in the most outer query, and t2.b defined for its + subquery. The set function is in the HAVING clause of the subquery and can + be evaluated in this subquery. + + 2. SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 + WHERE t2.b > ALL (SELECT t3.c FROM t3 GROUP BY t3.c + HAVING SUM(t1.a+t2.b) < t3.c)) + Here the set function SUM(t1.a+t2.b)is in the WHERE clause of the second + subquery - the most upper subquery where t1.a and t2.b are defined. + If we evaluate the function in this subquery we violate the context rules. + So we evaluate the function in the third subquery where it is used under + the HAVING clause. + + 3. SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 + WHERE t2.b > ALL (SELECT t3.c FROM t3 + WHERE SUM(t1.a+t2.b) < t3.c)) + In this query evaluation of SUM(t1.a+t2.b) is not legal neither in the second + nor in the third subqueries. So this query is invalid. + + Mostly set functions cannot be nested. In the query + SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20 + the expression SUM(b) is not acceptable, though it is under a HAVING clause. + It is acceptable in the query: + SELECT t.1 FROM t1 GROUP BY t1.a HAVING SUM(t1.b) > 20. + + An argument of a set function does not have to be a reference to a table + column as we saw it in examples above. This can be a more complex expression + SELECT t1.a FROM t1 GROUP BY t1.a HAVING SUM(t1.b+1) > 20. + The expression SUM(t1.b+1) has a very clear semantics in this context: + we sum up the values of t1.b+1 where t1.b varies for all values within a + group of rows that contain the same t1.a value. + + A set function for an outer query yields a constant within a subquery. So + the semantics of the query + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c + HAVING AVG(t2.c+SUM(t1.b)) > 20) + is still clear too. For a group of the rows with the same t1.a values we + calculate the value of SUM(t1.b). This value 's' is substituted in the + the subquery: + SELECT t2.c FROM t2 GROUP BY t2.c HAVING AVG(t2.c+s) + than returns some result set. + + By the same reason the following query with a subquery + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c + HAVING AVG(SUM(t1.b)) > 20) + is also acceptable. + + IMPLEMENTATION NOTES + + Three methods were added to the class to check the constraints specified + in the previous section. This methods utilize several new members. + + The field 'nest_level' contains the number of the level for the subquery + containing the set function. The main SELECT is of level 0, its subqueries + are of levels 1, the subqueries of the latters are of level 2 and so on. + + The field 'aggr_level' is to contain the nest level of the subquery + where the set function is aggregated. + + The field 'max_arg_level' is for the maximun of the nest levels of the + unbound column references occurred in the set function. A column reference + is unbound within a set function if it is not bound by any subquery + used as a subexpression in this function. A column reference is bound by + a subquery if it is a reference to the column by which the aggregation + of some set function that is used in the subquery is calculated. + For the set function used in the query + SELECT t1.a FROM t1 GROUP BY t1.a + HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b + HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c + HAVING SUM(t1.a+t2.b) < t3.c)) + the value of max_arg_level is equal to 1 since t1.a is bound in the main + query, and t2.b is bound by the first subquery whose nest level 1. + Obviously a set function cannot be aggregated in the subquery whose + nest level is less than max_arg_level. (Yet it can be aggregated in the + subqueries whose nest level is greater than max_arg_level.) + In the query + SELECT t.a FROM t1 HAVING AVG(t1.a+(SELECT MIN(t2.c) FROM t2)) + the value of the max_arg_level for the AVG set function is 0 since + the reference t2.c is bound in the subquery. + + The field 'max_sum_func_level' is to contain the maximum of the + nest levels of the set functions that are used as subexpressions of + the arguments of the given set function, but not aggregated in any + subquery within this set function. A nested set function s1 can be + used within set function s0 only if s1.max_sum_func_level < + s0.max_sum_func_level. Set function s1 is considered as nested + for set function s0 if s1 is not calculated in any subquery + within s0. + + A set function that as a subexpression in an argument of another set + function refers to the latter via the field 'in_sum_func'. + + The condition imposed on the usage of set functions are checked when + we traverse query subexpressions with the help of recursive method + fix_fields. When we apply this method to an object of the class + Item_sum, first, on the descent, we call the method init_sum_func_check + that initialize members used at checking. Then, on the ascent, we + call the method check_sum_func that validates the set function usage + and reports an error if it is illegal. + The method register_sum_func serves to link the items for the set functions + that are aggregated in the embedding (sub)queries. Circular chains of such + functions are attached to the corresponding st_select_lex structures + through the field inner_sum_func_list. + + Exploiting the fact that the members mentioned above are used in one + recursive function we could have allocated them on the thread stack. + Yet we don't do it now. + + We assume that the nesting level of subquries does not exceed 127. + TODO: to catch queries where the limit is exceeded to make the + code clean here. + +*/ + class Item_sum :public Item_result_field { public: @@ -33,7 +227,14 @@ public: }; Item **args, *tmp_args[2]; + Item **ref_by; /* pointer to a ref to the object used to register it */ + Item_sum *next; /* next in the circular chain of registered objects */ uint arg_count; + Item_sum *in_sum_func; /* embedding set function if any */ + int8 nest_level; /* number of the nesting level of the set function */ + int8 aggr_level; /* nesting level of the aggregating subquery */ + int8 max_arg_level; /* max level of unbound column references */ + int8 max_sum_func_level;/* max level of aggregation for embedded functions */ bool quick_group; /* If incremental update of fields */ void mark_as_sum_func(); @@ -111,6 +312,9 @@ public: virtual Field *create_tmp_field(bool group, TABLE *table, uint convert_blob_length); bool walk (Item_processor processor, byte *argument); + bool init_sum_func_check(THD *thd); + bool check_sum_func(THD *thd, Item **ref); + bool register_sum_func(THD *thd, Item **ref); }; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index e33ac05e293..3dfcfd55464 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -44,6 +44,7 @@ typedef ulonglong table_map; /* Used for table bits in join */ typedef Bitmap<64> key_map; /* Used for finding keys */ typedef ulong key_part_map; /* Used for finding key parts */ +typedef ulong nesting_map; /* Used for flags of nesting constructs */ /* query_id */ typedef ulonglong query_id_t; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d4376a65594..990f496f956 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -4267,11 +4267,13 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, { reg2 Item *item; bool save_set_query_id= thd->set_query_id; + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; List_iterator<Item> it(fields); DBUG_ENTER("setup_fields"); thd->set_query_id=set_query_id; - thd->allow_sum_func= allow_sum_func; + if (allow_sum_func) + thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level; thd->where="field list"; /* @@ -4294,6 +4296,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, if (!item->fixed && item->fix_fields(thd, it.ref()) || (item= *(it.ref()))->check_cols(1)) { + thd->lex->allow_sum_func= save_allow_sum_func; thd->set_query_id= save_set_query_id; DBUG_RETURN(TRUE); /* purecov: inspected */ } @@ -4304,6 +4307,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array, item->split_sum_func(thd, ref_pointer_array, *sum_func_list); thd->used_tables|= item->used_tables(); } + thd->lex->allow_sum_func= save_allow_sum_func; thd->set_query_id= save_set_query_id; DBUG_RETURN(test(thd->net.report_error)); } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 609156ef5a8..0391585a02b 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1561,7 +1561,6 @@ Statement::Statement(enum enum_state state_arg, ulong id_arg, :Query_arena(&main_mem_root, state_arg), id(id_arg), set_query_id(1), - allow_sum_func(0), lex(&main_lex), query(0), query_length(0), @@ -1582,7 +1581,6 @@ void Statement::set_statement(Statement *stmt) { id= stmt->id; set_query_id= stmt->set_query_id; - allow_sum_func= stmt->allow_sum_func; lex= stmt->lex; query= stmt->query; query_length= stmt->query_length; diff --git a/sql/sql_class.h b/sql/sql_class.h index 2679143e9a5..253fe25f54d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -785,19 +785,6 @@ public: field list can not contain duplicates. */ bool set_query_id; - /* - This variable is used in post-parse stage to declare that sum-functions, - or functions which have sense only if GROUP BY is present, are allowed. - For example in queries - SELECT MIN(i) FROM foo - SELECT GROUP_CONCAT(a, b, MIN(i)) FROM ... GROUP BY ... - MIN(i) have no sense. - Though it's grammar-related issue, it's hard to catch it out during the - parse stage because GROUP BY clause goes in the end of query. This - variable is mainly used in setup_fields/fix_fields. - See item_sum.cc for details. - */ - bool allow_sum_func; LEX_STRING name; /* name for named prepared statements */ LEX *lex; // parse tree descriptor diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 83f50ba3ac5..e0b9e2b9c37 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -331,7 +331,7 @@ bool mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds) SELECT_LEX *select_lex= &thd->lex->select_lex; DBUG_ENTER("mysql_prepare_delete"); - thd->allow_sum_func= 0; + thd->lex->allow_sum_func= 0; if (setup_tables(thd, &thd->lex->select_lex.context, &thd->lex->select_lex.top_join_list, table_list, conds, &select_lex->leaf_tables, diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 11f056d6510..14595ff7704 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -182,6 +182,9 @@ void lex_start(THD *thd, uchar *buf,uint length) lex->sroutines_list.empty(); lex->sroutines_list_own_last= lex->sroutines_list.next; lex->sroutines_list_own_elements= 0; + lex->nest_level=0 ; + lex->allow_sum_func= 0; + lex->in_sum_func= NULL; DBUG_VOID_RETURN; } @@ -1139,6 +1142,7 @@ void st_select_lex::init_query() first_cond_optimization= 1; parsing_place= NO_MATTER; exclude_from_table_unique_test= no_wrap_view_item= FALSE; + nest_level= 0; link_next= 0; } @@ -1158,6 +1162,7 @@ void st_select_lex::init_select() interval_list.empty(); use_index.empty(); ftfunc_list_alloc.empty(); + inner_sum_func_list= 0; ftfunc_list= &ftfunc_list_alloc; linkage= UNSPECIFIED_TYPE; order_list.elements= 0; diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 1e935c6dc2a..daec012304d 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -527,6 +527,8 @@ public: ulong table_join_options; uint in_sum_expr; uint select_number; /* number of select (used for EXPLAIN) */ + int nest_level; /* nesting level of select */ + Item_sum *inner_sum_func_list; /* list of sum func in nested selects */ uint with_wild; /* item list contain '*' */ bool braces; /* SELECT ... UNION (SELECT ... ) <- this braces */ /* TRUE when having fix field called in processing of this SELECT */ @@ -769,12 +771,23 @@ typedef struct st_lex SQL_LIST proc_list, auxilliary_table_list, save_list; create_field *last_field; + Item_sum *in_sum_func; udf_func udf; HA_CHECK_OPT check_opt; // check/repair options HA_CREATE_INFO create_info; LEX_MASTER_INFO mi; // used by CHANGE MASTER USER_RESOURCES mqh; ulong type; + /* + This variable is used in post-parse stage to declare that sum-functions, + or functions which have sense only if GROUP BY is present, are allowed. + For example in a query + SELECT ... FROM ...WHERE MIN(i) == 1 GROUP BY ... HAVING MIN(i) > 2 + MIN(i) in the WHERE clause is not allowed in the opposite to MIN(i) + in the HAVING clause. Due to possible nesting of select construct + the variable can contain 0 or 1 for each nest level. + */ + nesting_map allow_sum_func; enum_sql_command sql_command, orig_sql_command; thr_lock_type lock_option; enum SSL_type ssl_type; /* defined in violite.h */ @@ -793,6 +806,7 @@ typedef struct st_lex uint grant, grant_tot_col, which_columns; uint fk_delete_opt, fk_update_opt, fk_match_option; uint slave_thd_opt, start_transaction_opt; + int nest_level; /* In LEX representing update which were transformed to multi-update stores total number of tables. For LEX representing multi-delete diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index d0585dd1a65..06fe9e2398f 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5362,6 +5362,8 @@ mysql_new_select(LEX *lex, bool move_down) select_lex->parent_lex= lex; /* Used in init_query. */ select_lex->init_query(); select_lex->init_select(); + lex->nest_level++; + select_lex->nest_level= lex->nest_level; /* Don't evaluate this subquery during statement prepare even if it's a constant one. The flag is switched off in the end of diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 5f3539ca1e9..e32fa3f962a 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -2125,7 +2125,8 @@ void reinit_stmt_before_use(THD *thd, LEX *lex) lex->result->cleanup(); lex->result->set_thd(thd); } - thd->allow_sum_func= 0; + lex->allow_sum_func= 0; + lex->in_sum_func= NULL; DBUG_VOID_RETURN; } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 806a6d3ea32..7994171d28e 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -270,21 +270,20 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array, ORDER *order, ORDER *group, bool *hidden_group_fields) { - bool save_allow_sum_func; int res; + nesting_map save_allow_sum_func=thd->lex->allow_sum_func ; DBUG_ENTER("setup_without_group"); - save_allow_sum_func= thd->allow_sum_func; - thd->allow_sum_func= 0; + thd->lex->allow_sum_func&= ~(1 << thd->lex->current_select->nest_level); res= setup_conds(thd, tables, leaves, conds); - thd->allow_sum_func= save_allow_sum_func; + thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level; res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields, order); - thd->allow_sum_func= 0; + thd->lex->allow_sum_func&= ~(1 << thd->lex->current_select->nest_level); res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields, group, hidden_group_fields); - thd->allow_sum_func= save_allow_sum_func; + thd->lex->allow_sum_func= save_allow_sum_func; DBUG_RETURN(res); } @@ -351,8 +350,9 @@ JOIN::prepare(Item ***rref_pointer_array, if (having) { + nesting_map save_allow_sum_func= thd->lex->allow_sum_func; thd->where="having clause"; - thd->allow_sum_func=1; + thd->lex->allow_sum_func|= 1 << select_lex_arg->nest_level; select_lex->having_fix_field= 1; bool having_fix_rc= (!having->fixed && (having->fix_fields(thd, &having) || @@ -362,6 +362,18 @@ JOIN::prepare(Item ***rref_pointer_array, DBUG_RETURN(-1); /* purecov: inspected */ if (having->with_sum_func) having->split_sum_func(thd, ref_pointer_array, all_fields); + thd->lex->allow_sum_func= save_allow_sum_func; + } + if (select_lex->inner_sum_func_list) + { + Item_sum *end=select_lex->inner_sum_func_list; + Item_sum *item_sum= end; + do + { + item_sum= item_sum->next; + item_sum->split_sum_func2(thd, ref_pointer_array, + all_fields, item_sum->ref_by, FALSE); + } while (item_sum != end); } if (!thd->lex->view_prepare_mode) @@ -5165,7 +5177,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->const_table_map|=RAND_TABLE_BIT; { // Check const tables COND *const_cond= - make_cond_for_table(cond,join->const_table_map,(table_map) 0); + make_cond_for_table(cond, + join->const_table_map, + (table_map) 0); DBUG_EXECUTE("where",print_where(const_cond,"constants");); for (JOIN_TAB *tab= join->join_tab+join->const_tables; tab < join->join_tab+join->tables ; tab++) diff --git a/sql/sql_update.cc b/sql/sql_update.cc index a8e21177338..24a080e0675 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -589,7 +589,7 @@ bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list, bzero((char*) &tables,sizeof(tables)); // For ORDER BY tables.table= table; tables.alias= table_list->alias; - thd->allow_sum_func= 0; + thd->lex->allow_sum_func= 0; if (setup_tables(thd, &select_lex->context, &select_lex->top_join_list, table_list, conds, &select_lex->leaf_tables, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 94ce04fb5b1..bad2dabb3f2 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -8931,6 +8931,7 @@ subselect_end: LEX *lex=Lex; lex->pop_context(); lex->current_select = lex->current_select->return_after_parsing(); + lex->nest_level--; }; opt_view_list: |