diff options
author | Gleb Shchepa <gshchepa@mysql.com> | 2010-02-06 23:54:30 +0400 |
---|---|---|
committer | Gleb Shchepa <gshchepa@mysql.com> | 2010-02-06 23:54:30 +0400 |
commit | 994c0f83083ceee30430f28b8f1ced9b71944dbb (patch) | |
tree | bf8b15b6dfd85b7aed65d198e3d7eda5626b83d8 | |
parent | 3ad5d21ebc981a0b8c59597f0f1a306f3c9d6bf2 (diff) | |
download | mariadb-git-994c0f83083ceee30430f28b8f1ced9b71944dbb.tar.gz |
Bug #45640: optimizer bug produces wrong results
Grouping by a subquery in a query with a distinct aggregate
function lead to a wrong result (wrong and unordered
grouping values).
There are two related problems:
1) The query like this:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) c
FROM t1 GROUP BY aa
returned wrong result, because the outer reference "t1.a"
in the subquery was substituted with the Item_ref item.
The Item_ref item obtains data from the result_field object
that refreshes once after the end of each group. This data
is not applicable to filesort since filesort() doesn't care
about groups (and doesn't update result_field objects with
copy_fields() and so on). Also that data is not applicable
to group separation algorithm: end_send_group() checks every
record with test_if_group_changed() that evaluates Item_ref
items, but it refreshes those Item_ref-s only after the end
of group, that is a vicious circle and the grouped column
values in the output are shifted.
Fix: if
a) we grouping by a subquery and
b) that subquery has outer references to FROM list
of the grouping query,
then we substitute these outer references with
Item_direct_ref like references under aggregate
functions: Item_direct_ref obtains data directly
from the current record.
2) The query with a non-trivial grouping expression like:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) c
FROM t1 GROUP BY aa+0
also returned wrong result, since JOIN::exec() substitutes
references to top-level aliases in SELECT list with Item_copy
caching items. Item_copy items have same refreshing policy
as Item_ref items, so the whole groping expression with
Item_copy inside returns wrong result in filesort() and
end_send_group().
Fix: include aliased items into GROUP BY item tree instead
of Item_ref references to them.
mysql-test/r/group_by.result:
Test case for bug #45640
mysql-test/t/group_by.test:
Test case for bug #45640
sql/item.cc:
Bug #45640: optimizer bug produces wrong results
Item_field::fix_fields() has been modified to resolve
aliases in GROUP BY item trees into aliased items instead
of Item_ref items.
sql/item.h:
Bug #45640: optimizer bug produces wrong results
- Item::find_item_processor() has been introduced.
- Item_ref::walk() has been modified to apply processors
to itself too (not only to referenced item).
sql/mysql_priv.h:
Bug #45640: optimizer bug produces wrong results
fix_inner_refs() has been modified to accept group_list
parameter.
sql/sql_lex.cc:
Bug #45640: optimizer bug produces wrong results
Initialization of st_select_lex::group_fix_field has
been added.
sql/sql_lex.h:
Bug #45640: optimizer bug produces wrong results
The st_select_lex::group_fix_field field has been introduced
to control alias resolution in Itef_fied::fix_fields.
sql/sql_select.cc:
Bug #45640: optimizer bug produces wrong results
- The fix_inner_refs function has been modified to treat
subquery outer references like outer fields under aggregate
functions, if they are included in GROUP BY item tree.
- The find_order_in_list function has been modified to
fix Item_field alias fields included in the GROUP BY item
trees in a special manner.
-rw-r--r-- | mysql-test/r/group_by.result | 88 | ||||
-rw-r--r-- | mysql-test/t/group_by.test | 50 | ||||
-rw-r--r-- | sql/item.cc | 28 | ||||
-rw-r--r-- | sql/item.h | 6 | ||||
-rw-r--r-- | sql/mysql_priv.h | 2 | ||||
-rw-r--r-- | sql/sql_lex.cc | 1 | ||||
-rw-r--r-- | sql/sql_lex.h | 2 | ||||
-rw-r--r-- | sql/sql_select.cc | 46 |
8 files changed, 211 insertions, 12 deletions
diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result index 742d4b90807..6ca08df40ea 100644 --- a/mysql-test/r/group_by.result +++ b/mysql-test/r/group_by.result @@ -1703,3 +1703,91 @@ COUNT(i) 1 DROP TABLE t1; SET @@sql_mode = @old_sql_mode; +# +# Bug #45640: optimizer bug produces wrong results +# +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 VALUES (4, 40), (1, 10), (2, 20), (2, 20), (3, 30); +# should return 4 ordered records: +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa; +aa COUNT(DISTINCT b) +1 1 +2 1 +3 1 +4 1 +SELECT (SELECT (SELECT t1.a)) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa; +aa COUNT(DISTINCT b) +1 1 +2 1 +3 1 +4 1 +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0; +aa COUNT(DISTINCT b) +1 1 +2 1 +3 1 +4 1 +# should return the same result in a reverse order: +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa; +aa COUNT(DISTINCT b) +4 1 +3 1 +2 1 +1 1 +# execution plan should not use temporary table: +EXPLAIN EXTENDED +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 100.00 Using filesort +2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used +Warnings: +Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 +Note 1003 select (select `test`.`t1`.`a` AS `a`) AS `aa`,count(distinct `test`.`t1`.`b`) AS `COUNT(DISTINCT b)` from `test`.`t1` group by ((select `test`.`t1`.`a` AS `a`) + 0) +EXPLAIN EXTENDED +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 100.00 Using filesort +2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used +Warnings: +Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 +Note 1003 select (select `test`.`t1`.`a` AS `a`) AS `aa`,count(distinct `test`.`t1`.`b`) AS `COUNT(DISTINCT b)` from `test`.`t1` group by -((select `test`.`t1`.`a` AS `a`)) +# should return only one record +SELECT (SELECT tt.a FROM t1 tt LIMIT 1) aa, COUNT(DISTINCT b) FROM t1 +GROUP BY aa; +aa COUNT(DISTINCT b) +4 4 +CREATE TABLE t2 SELECT DISTINCT a FROM t1; +# originally reported queries (1st two columns of next two query +# results should be same): +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b) +FROM t1 GROUP BY aa, b; +aa b COUNT(DISTINCT b) +1 10 1 +2 20 1 +3 30 1 +4 40 1 +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b) +FROM t1 GROUP BY aa, b; +aa b COUNT( b) +1 10 1 +2 20 2 +3 30 1 +4 40 1 +# ORDER BY for sure: +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b) +FROM t1 GROUP BY aa, b ORDER BY -aa, -b; +aa b COUNT(DISTINCT b) +4 40 1 +3 30 1 +2 20 1 +1 10 1 +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b) +FROM t1 GROUP BY aa, b ORDER BY -aa, -b; +aa b COUNT( b) +4 40 1 +3 30 1 +2 20 2 +1 10 1 +DROP TABLE t1, t2; +# +# End of 5.1 tests diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test index 5b96213034a..e6ea5ecc7f6 100644 --- a/mysql-test/t/group_by.test +++ b/mysql-test/t/group_by.test @@ -1158,3 +1158,53 @@ SELECT COUNT(i) FROM t1 WHERE i > 1; DROP TABLE t1; SET @@sql_mode = @old_sql_mode; +--echo # +--echo # Bug #45640: optimizer bug produces wrong results +--echo # + +CREATE TABLE t1 (a INT, b INT); +INSERT INTO t1 VALUES (4, 40), (1, 10), (2, 20), (2, 20), (3, 30); + +--echo # should return 4 ordered records: +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa; + +SELECT (SELECT (SELECT t1.a)) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa; + +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0; + +--echo # should return the same result in a reverse order: +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa; + +--echo # execution plan should not use temporary table: +EXPLAIN EXTENDED +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0; + +EXPLAIN EXTENDED +SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa; + +--echo # should return only one record +SELECT (SELECT tt.a FROM t1 tt LIMIT 1) aa, COUNT(DISTINCT b) FROM t1 + GROUP BY aa; + +CREATE TABLE t2 SELECT DISTINCT a FROM t1; + +--echo # originally reported queries (1st two columns of next two query +--echo # results should be same): + +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b) + FROM t1 GROUP BY aa, b; +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b) + FROM t1 GROUP BY aa, b; + +--echo # ORDER BY for sure: + +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b) + FROM t1 GROUP BY aa, b ORDER BY -aa, -b; +SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b) + FROM t1 GROUP BY aa, b ORDER BY -aa, -b; + +DROP TABLE t1, t2; + +--echo # + +--echo # End of 5.1 tests diff --git a/sql/item.cc b/sql/item.cc index 68c10c32b50..c967a1a0feb 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -4300,17 +4300,33 @@ bool Item_field::fix_fields(THD *thd, Item **reference) It's not an Item_field in the select list so we must make a new Item_ref to point to the Item in the select list and replace the Item_field created by the parser with the new Item_ref. + + NOTE: If we are fixing an alias reference inside ORDER/GROUP BY + item tree, then we use new Item_ref as an intermediate value + to resolve referenced item only. + In this case the new Item_ref item is unused. */ Item_ref *rf= new Item_ref(context, db_name,table_name,field_name); if (!rf) return 1; - thd->change_item_tree(reference, rf); + + bool save_group_fix_field= thd->lex->current_select->group_fix_field; /* - Because Item_ref never substitutes itself with other items - in Item_ref::fix_fields(), we can safely use the original - pointer to it even after fix_fields() - */ - return rf->fix_fields(thd, reference) || rf->check_cols(1); + No need for recursive resolving of aliases. + */ + thd->lex->current_select->group_fix_field= 0; + + bool ret= rf->fix_fields(thd, (Item **) &rf) || rf->check_cols(1); + thd->lex->current_select->group_fix_field= save_group_fix_field; + if (ret) + return TRUE; + + if (save_group_fix_field && alias_name_used) + thd->change_item_tree(reference, *rf->ref); + else + thd->change_item_tree(reference, rf); + + return FALSE; } } } diff --git a/sql/item.h b/sql/item.h index 88e90924fcc..d2e8382023b 100644 --- a/sql/item.h +++ b/sql/item.h @@ -901,6 +901,7 @@ public: virtual bool change_context_processor(uchar *context) { return 0; } virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; } virtual bool is_expensive_processor(uchar *arg) { return 0; } + virtual bool find_item_processor(uchar *arg) { return this == (void *) arg; } virtual bool register_field_in_read_map(uchar *arg) { return 0; } /* Check if a partition function is allowed @@ -2275,7 +2276,10 @@ public: return ref ? (*ref)->real_item() : this; } bool walk(Item_processor processor, bool walk_subquery, uchar *arg) - { return (*ref)->walk(processor, walk_subquery, arg); } + { + return (*ref)->walk(processor, walk_subquery, arg) || + (this->*processor)(arg); + } virtual void print(String *str, enum_query_type query_type); bool result_as_longlong() { diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 34f29e7c458..1b775e658f1 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1192,7 +1192,7 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, List<Item> &fields, List<Item> &all_fields, ORDER *order, bool *hidden_group_fields); bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, - Item **ref_pointer_array); + Item **ref_pointer_array, ORDER *group_list= NULL); bool handle_select(THD *thd, LEX *lex, select_result *result, ulong setup_tables_done_option); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 2adbc44eb12..5097ca2ad5b 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1592,6 +1592,7 @@ void st_select_lex::init_query() having= prep_having= where= prep_where= 0; olap= UNSPECIFIED_OLAP_TYPE; having_fix_field= 0; + group_fix_field= 0; context.select_lex= this; context.init(); /* diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 4e4794ef2cf..459878c03fc 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -647,6 +647,8 @@ public: bool braces; /* SELECT ... UNION (SELECT ... ) <- this braces */ /* TRUE when having fix field called in processing of this SELECT */ bool having_fix_field; + /* TRUE when GROUP BY fix field called in processing of this SELECT */ + bool group_fix_field; /* List of references to fields referenced from inner selects */ List<Item_outer_ref> inner_refs_list; /* Number of Item_sum-derived objects in this SELECT */ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8744f77d6b1..ec880f3703e 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -287,6 +287,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, all_fields List of all fields used in select select Current select ref_pointer_array Array of references to Items used in current select + group_list GROUP BY list (is NULL by default) DESCRIPTION The function serves 3 purposes - adds fields referenced from inner @@ -305,6 +306,8 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, function is aggregated in the select where the outer field was resolved or in some more inner select then the Item_direct_ref class should be used. + Also it should be used if we are grouping by a subquery containing + the outer field. The resolution is done here and not at the fix_fields() stage as it can be done only after sum functions are fixed and pulled up to selects where they are have to be aggregated. @@ -321,7 +324,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, - Item **ref_pointer_array) + Item **ref_pointer_array, ORDER *group_list) { Item_outer_ref *ref; bool res= FALSE; @@ -371,6 +374,22 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, } } } + else + { + /* + Check if GROUP BY item trees contain the outer ref: + in this case we have to use Item_direct_ref instead of Item_ref. + */ + for (ORDER *group= group_list; group; group= group->next) + { + if ((*group->item)->walk(&Item::find_item_processor, TRUE, + (uchar *) ref)) + { + direct_ref= TRUE; + break; + } + } + } new_ref= direct_ref ? new Item_direct_ref(ref->context, item_ref, ref->table_name, ref->field_name, ref->alias_name_used) : @@ -577,7 +596,8 @@ JOIN::prepare(Item ***rref_pointer_array, } if (select_lex->inner_refs_list.elements && - fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array)) + fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array, + group_list)) DBUG_RETURN(-1); if (group_list) @@ -14547,11 +14567,29 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, We check order_item->fixed because Item_func_group_concat can put arguments for which fix_fields already was called. + + group_fix_field= TRUE is to resolve aliases from the SELECT list + without creating of Item_ref-s: JOIN::exec() wraps aliased items + in SELECT list with Item_copy items. To re-evaluate such a tree + that includes Item_copy items we have to refresh Item_copy caches, + but: + - filesort() never refresh Item_copy items, + - end_send_group() checks every record for group boundary by the + test_if_group_changed function that obtain data from these + Item_copy items, but the copy_fields function that + refreshes Item copy items is called after group boundaries only - + that is a vicious circle. + So we prevent inclusion of Item_copy items. */ - if (!order_item->fixed && + bool save_group_fix_field= thd->lex->current_select->group_fix_field; + if (is_group_field) + thd->lex->current_select->group_fix_field= TRUE; + bool ret= (!order_item->fixed && (order_item->fix_fields(thd, order->item) || (order_item= *order->item)->check_cols(1) || - thd->is_fatal_error)) + thd->is_fatal_error)); + thd->lex->current_select->group_fix_field= save_group_fix_field; + if (ret) return TRUE; /* Wrong field. */ uint el= all_fields.elements; |