Bug#12328597 - MULTIPLE COUNT(DISTINCT) IN SAME SELECT FALSE

WITH COMPOSITE KEY COLUMNS Problem:- While running a SELECT query with several AGGR(DISTINCT) function and these are referring to different field of same composite key, Returned incorrect value. Analysis:- In a table, where we have composite key like (a,b,c) and when we give a query like select COUNT(DISTINCT b), SUM(DISTINCT a) from .... here, we first make a list of items in Aggr(distinct) function (which is a, b), where order of item doesn't matter. and then we see, whether we have a composite key where the prefix of index columns matches the items of the aggregation function. (in this case we have a,b,c). if yes, so we can use loose index scan and we need not perform duplicate removal to distinct in our aggregate function. In our table, we traverse column marked with <-- and get the result as (a,b,c) count(distinct b) sum(distinct a) treated as count b treated as sum(a) (1,1,2)<-- 1 1 (1,2,2)<-- 1++=2 1+1=2 (1,2,3) (2,1,2)<-- 2++=3 1+1+2=4 (2,2,2)<-- 3++=4 1+1+2+2=6 (2,2,3) result will be 4,6, but it should be (2,3) As in this case, our assumption is incorrect. If we have query like select count(distinct a,b), sum(distinct a,b)from .. then we can use loose index scan Solution:- In our query, when we have more then one aggr(distinct) function then they should refer to same fields like select count(distinct a,b), sum(distinct a,b) from .. -->we can use loose scan index as both aggr(distinct) refer to same fields a,b. If they are referring to different field like select count(distinct a), sum(distinct b) from .. -->will not use loose scan index as both aggr(distinct) refer to different fields.
author: Neeraj Bisht <neeraj.x.bisht@oracle.com> 2013-05-13 17:15:25 +0530
committer: Neeraj Bisht <neeraj.x.bisht@oracle.com> 2013-05-13 17:15:25 +0530
commit: 2812634b6c45b96bf3a0def5a6df4267304e8dca (patch)
tree: 78c6d49a03f1f8f7216fcef2c311d2f47751503b /sql
parent: 575559754d957e1119d48dc5cec7d7b94720aa61 (diff)
download: mariadb-git-2812634b6c45b96bf3a0def5a6df4267304e8dca.tar.gz
3 files changed, 35 insertions, 6 deletions
diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h
index f08a6a07bce..449fde37bee 100644
--- a/sql/sql_bitmap.h
+++ b/sql/sql_bitmap.h
@@ -71,6 +71,7 @@ public:
   my_bool is_subset(const Bitmap& map2) const { return bitmap_is_subset(&map, &map2.map); }
   my_bool is_overlapping(const Bitmap& map2) const { return bitmap_is_overlapping(&map, &map2.map); }
   my_bool operator==(const Bitmap& map2) const { return bitmap_cmp(&map, &map2.map); }
+  my_bool operator!=(const Bitmap& map2) const { return !(*this == map2); }
   char *print(char *buf) const
   {
     char *s=buf;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 0cb6981bfd8..fbb5d00e7a8 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -4204,8 +4204,23 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
   Optionally (if out_args is supplied) will push the arguments of 
   AGGFN(DISTINCT) to the list
 
+  Check for every COUNT(DISTINCT), AVG(DISTINCT) or
+  SUM(DISTINCT). These can be resolved by Loose Index Scan as long
+  as all the aggregate distinct functions refer to the same
+  fields. Thus:
+
+  SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
+  SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT a)   ... => can use LIS
+  SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a)   ... => cannot use LIS
+  SELECT AGGFN(DISTINCT a),    AGGFN(DISTINCT b)   ... => cannot use LIS
+  etc.
+
   @param      join       the join to check
-  @param[out] out_args   list of aggregate function arguments
+  @param[out] out_args   Collect the arguments of the aggregate functions
+                         to a list. We don't worry about duplicates as
+                         these will be sorted out later in
+                         get_best_group_min_max.
+
   @return                does the query qualify for indexed AGGFN(DISTINCT)
     @retval   true       it does
     @retval   false      AGGFN(DISTINCT) must apply distinct in it.
@@ -4216,6 +4231,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
 {
   Item_sum **sum_item_ptr;
   bool result= false;
+  Field_map first_aggdistinct_fields;
 
   if (join->tables != 1 ||                    /* reference more than 1 table */
       join->select_distinct ||                /* or a DISTINCT */
@@ -4228,6 +4244,7 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
   for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
   {
     Item_sum *sum_item= *sum_item_ptr;
+    Field_map cur_aggdistinct_fields;
     Item *expr;
     /* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
     switch (sum_item->sum_func())
@@ -4257,15 +4274,23 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
       if (expr->real_item()->type() != Item::FIELD_ITEM)
         return false;
 
-      /* 
-        If we came to this point the AGGFN(DISTINCT) loose index scan
-        optimization is applicable 
-      */
+      Item_field* item= static_cast<Item_field*>(expr->real_item());
       if (out_args)
-        out_args->push_back((Item_field *) expr->real_item());
+        out_args->push_back(item);
+
+      cur_aggdistinct_fields.set_bit(item->field->field_index);
       result= true;
     }
+    /*
+      If there are multiple aggregate functions, make sure that they all
+      refer to exactly the same set of columns.
+    */
+    if (first_aggdistinct_fields.is_clear_all())
+      first_aggdistinct_fields.merge(cur_aggdistinct_fields);
+    else if (first_aggdistinct_fields != cur_aggdistinct_fields)
+      return false;
   }
+
   return result;
 }
 
diff --git a/sql/table.h b/sql/table.h
index 9a85dd83b13..5d4bb654263 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -894,6 +894,9 @@ enum index_hint_type
   INDEX_HINT_FORCE
 };
 
+/* Bitmap of table's fields */
+typedef Bitmap<MAX_FIELDS> Field_map;
+
 struct TABLE
 {
   TABLE() {}                               /* Remove gcc warning */
author	Neeraj Bisht <neeraj.x.bisht@oracle.com>	2013-05-13 17:15:25 +0530
committer	Neeraj Bisht <neeraj.x.bisht@oracle.com>	2013-05-13 17:15:25 +0530
commit	2812634b6c45b96bf3a0def5a6df4267304e8dca (patch)
tree	78c6d49a03f1f8f7216fcef2c311d2f47751503b /sql
parent	575559754d957e1119d48dc5cec7d7b94720aa61 (diff)
download	mariadb-git-2812634b6c45b96bf3a0def5a6df4267304e8dca.tar.gz