diff options
author | Guilhem Bichot <guilhem.bichot@oracle.com> | 2013-12-04 12:32:42 +0100 |
---|---|---|
committer | Guilhem Bichot <guilhem.bichot@oracle.com> | 2013-12-04 12:32:42 +0100 |
commit | c90cdf5d494e0caef9d694c75c739c188df1a512 (patch) | |
tree | 9c322884f5fcb938bfe5d324c6fdc54ecae53173 /sql/item_sum.h | |
parent | 494d0247b7defe2828f29a979c6695dab9a2925f (diff) | |
download | mariadb-git-c90cdf5d494e0caef9d694c75c739c188df1a512.tar.gz |
Bug#16539979 - BASIC SELECT COUNT(DISTINCT ID) IS BROKEN
Bug#17867117 - ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK
Problem:
COUNT DISTINCT gives incorrect result when it uses a Unique
Tree and its last inserted record has null value.
Here is how COUNT DISTINCT is processed, given that this query is not
using loose index scan.
When a row is produced as a result of joining tables (there is only
one table here), we store the SELECTed value in a Unique tree. This
allows elimination of any duplicates, and thus implements DISTINCT.
When we have processed all rows like this, we walk the Unique tree,
counting its elements, in Aggregator_distinct::endup() (tree->walk());
for each element we call Item_sum_count::add(). Such function wants to
ignore any NULL value, for that it checks item_sum -> args[0] ->
null_value. It is a mistake: when walking the Unique tree, the value
to be aggregated is not item_sum ->args[0] but rather table ->
field[0].
Solution:
instead of item_sum -> args[0] -> null_value, use arg_is_null(), which
knows where to look (like in fix for bug 57932).
As a consequence of this solution, we have to make arg_is_null() a
little more general:
1) Because it was so far only used for AVG() (which always has a
single argument), this function was looking at a single argument; now
that it has to work with COUNT(DISTINCT expression1,expression2), it
must look at all arguments.
2) Because we start using arg_is_null () for COUNT(DISTINCT), i.e. in
Item_sum_count::add (), it implies that we are also using it for
COUNT(no DISTINCT) (same add ()). For COUNT(no DISTINCT), the
nullness to check is that of item_sum -> args[0]. But the null_value
of such item is reliable only if val_*() has been called on it. So far
arg_is_null() was always used after a call to arg_val*(), so could
rely on null_value; but for COUNT, there is no call to arg_val*(), so
arg_is_null() has to call is_null() instead.
Testcase for 16539979 by Neeraj. Testcase for 17867117 contributed by
Xiaobin Lin from Taobao.
Diffstat (limited to 'sql/item_sum.h')
-rw-r--r-- | sql/item_sum.h | 43 |
1 files changed, 24 insertions, 19 deletions
diff --git a/sql/item_sum.h b/sql/item_sum.h index edcdb5a4d1e..3a670cccee5 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -58,19 +58,8 @@ protected: /* the aggregate function class to act on */ Item_sum *item_sum; - /** - When feeding back the data in endup() from Unique/temp table back to - Item_sum::add() methods we must read the data from Unique (and not - recalculate the functions that are given as arguments to the aggregate - function. - This flag is to tell the add() methods to take the data from the Unique - instead by calling the relevant val_..() method - */ - - bool use_distinct_values; - public: - Aggregator (Item_sum *arg): item_sum(arg), use_distinct_values(FALSE) {} + Aggregator (Item_sum *arg): item_sum(arg) {} virtual ~Aggregator () {} /* Keep gcc happy */ enum Aggregator_type { SIMPLE_AGGREGATOR, DISTINCT_AGGREGATOR }; @@ -107,10 +96,16 @@ public: /** Floating point value of being-aggregated argument */ virtual double arg_val_real() = 0; /** - NULLness of being-aggregated argument; can be called only after - arg_val_decimal() or arg_val_real(). + NULLness of being-aggregated argument. + + @param use_null_value Optimization: to determine if the argument is NULL + we must, in the general case, call is_null() on it, which itself might + call val_*() on it, which might be costly. If you just have called + arg_val*(), you can pass use_null_value=true; this way, arg_is_null() + might avoid is_null() and instead do a cheap read of the Item's null_value + (updated by arg_val*()). */ - virtual bool arg_is_null() = 0; + virtual bool arg_is_null(bool use_null_value) = 0; }; @@ -480,7 +475,7 @@ public: Item *get_arg(uint i) { return args[i]; } Item *set_arg(uint i, THD *thd, Item *new_val); - uint get_arg_count() { return arg_count; } + uint get_arg_count() const { return arg_count; } /* Initialization of distinct related members */ void init_aggregator() @@ -607,10 +602,20 @@ class Aggregator_distinct : public Aggregator */ bool always_null; + /** + When feeding back the data in endup() from Unique/temp table back to + Item_sum::add() methods we must read the data from Unique (and not + recalculate the functions that are given as arguments to the aggregate + function. + This flag is to tell the arg_*() methods to take the data from the Unique + instead of calling the relevant val_..() method. + */ + bool use_distinct_values; + public: Aggregator_distinct (Item_sum *sum) : Aggregator(sum), table(NULL), tmp_table_param(NULL), tree(NULL), - always_null(FALSE) {} + always_null(false), use_distinct_values(false) {} virtual ~Aggregator_distinct (); Aggregator_type Aggrtype() { return DISTINCT_AGGREGATOR; } @@ -620,7 +625,7 @@ public: void endup(); virtual my_decimal *arg_val_decimal(my_decimal * value); virtual double arg_val_real(); - virtual bool arg_is_null(); + virtual bool arg_is_null(bool use_null_value); bool unique_walk_function(void *element); static int composite_key_cmp(void* arg, uchar* key1, uchar* key2); @@ -646,7 +651,7 @@ public: void endup() {}; virtual my_decimal *arg_val_decimal(my_decimal * value); virtual double arg_val_real(); - virtual bool arg_is_null(); + virtual bool arg_is_null(bool use_null_value); }; |