Changed aggregate distinct optimization with indexes to be cost based.

Before the cost of an aggregate distinct (COUNT(DISTINCT ...)) was set to 0 if the values where part of an index and the cost of grouping was higher than the best cost so far. This was shown in explain with "Using index for group-by (scanning)". This patch fixes it by calculating the cost of aggregate distinct and using scanning only if the cost was better than group-by-optimization. Thing taken into account: - When using aggregate distinct on index, the filtering is done before the row is checked against the WHERE and we have thus less WHERE cost. - When comparing a cost from aggregate distinct, we add to the compared to plan the cost of doing the filtering later in the SQL level.
author: Monty <monty@mariadb.org> 2022-10-04 12:59:43 +0300
committer: Monty <monty@mariadb.org> 2022-10-04 16:11:19 +0300
commit: a35f715f440eb9ce93e4245cb1e4e5bc6f6972a2 (patch)
tree: 1efc35fe4ccfa66afdd067483d9ceff74830d105
parent: a5d799298aa88d78998bc964743964abc70b84bd (diff)
download: mariadb-git-a35f715f440eb9ce93e4245cb1e4e5bc6f6972a2.tar.gz
2 files changed, 64 insertions, 17 deletions
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 1f6675b8c15..b5de9887e36 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2461,20 +2461,21 @@ void TRP_INDEX_MERGE::trace_basic_info(PARAM *param,
 class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
 {
 private:
-  bool have_min, have_max, have_agg_distinct;
-  KEY_PART_INFO *min_max_arg_part;
   uint group_prefix_len;
   uint used_key_parts;
   uint group_key_parts;
-  KEY *index_info;
   uint index;
   uint key_infix_len;
+  uint param_idx; /* Index of used key in param->key. */
   uchar key_infix[MAX_KEY_LENGTH];
+  KEY *index_info;
+  KEY_PART_INFO *min_max_arg_part;
   SEL_TREE *range_tree; /* Represents all range predicates in the query. */
   SEL_ARG  *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
-  uint param_idx; /* Index of used key in param->key. */
-  bool is_index_scan; /* Use index_next() instead of random read */ 
+  bool have_min, have_max;
 public:
+  bool have_agg_distinct;
+  bool is_index_scan; /* Use index_next() instead of random read */
   /* Number of records selected by the ranges in index_tree. */
   ha_rows quick_prefix_records;
 public:
@@ -2487,13 +2488,14 @@ public:
                     uchar *key_infix_arg,
                     SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
                     uint param_idx_arg, ha_rows quick_prefix_records_arg)
-  : have_min(have_min_arg), have_max(have_max_arg),
+  : group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
+    group_key_parts(group_key_parts_arg),
+    index(index_arg), key_infix_len(key_infix_len_arg), param_idx(param_idx_arg),
+    index_info(index_info_arg),min_max_arg_part(min_max_arg_part_arg),
+    range_tree(tree_arg), index_tree(index_tree_arg),
+    have_min(have_min_arg), have_max(have_max_arg),
     have_agg_distinct(have_agg_distinct_arg),
-    min_max_arg_part(min_max_arg_part_arg),
-    group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
-    group_key_parts(group_key_parts_arg), index_info(index_info_arg),
-    index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
-    index_tree(index_tree_arg), param_idx(param_idx_arg), is_index_scan(FALSE),
+    is_index_scan(FALSE),
     quick_prefix_records(quick_prefix_records_arg)
     {
       if (key_infix_len)
@@ -3060,6 +3062,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     if (!only_single_index_range_scan)
     {
       TRP_GROUP_MIN_MAX *group_trp;
+      double duplicate_removal_cost= 0;
       if (tree)
         restore_nonrange_trees(&param, tree, backup_keys);
       if ((group_trp= get_best_group_min_max(&param, tree, read_time)))
@@ -3078,9 +3081,31 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
         if (unlikely(thd->trace_started()))
           group_trp->trace_basic_info(&param, &grp_summary);
 
-        if (group_trp->read_cost < best_read_time || force_group_by)
+        if (group_trp->have_agg_distinct && group_trp->is_index_scan)
+        {
+          /*
+            We are optimization a distinct aggregate, like
+            SELECT count(DISTINCT a,b,c) FROM ...
+
+            The group cost includes removal of the distinct, so to be
+            able to compare costs, we add small cost to the original cost
+            that stands for the extra work we have to do on the outside of
+            the engine to do duplicate elimination for each output row if
+            we are not using the grouping code.
+          */
+          duplicate_removal_cost= (DUPLICATE_REMOVAL_COST *
+                                   (best_trp ? best_trp->records :
+                                    table_records));
+        }
+        if (group_trp->read_cost < best_read_time + duplicate_removal_cost ||
+            force_group_by)
         {
-          grp_summary.add("chosen", true);
+          if (thd->trace_started())
+          {
+            if (duplicate_removal_cost)
+              grp_summary.add("duplicate_removal_cost", duplicate_removal_cost);
+            grp_summary.add("chosen", true);
+          }
           best_trp= group_trp;
         }
         else
@@ -14484,11 +14509,31 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
 
     read_plan->read_cost= best_read_cost;
     read_plan->records=   best_records;
-    if (read_time < best_read_cost && is_agg_distinct)
+    if (is_agg_distinct)
     {
-      trace_group.add("index_scan", true);
-      read_plan->read_cost= 0;
-      read_plan->use_index_scan();
+      double best_cost, duplicate_removal_cost;
+      ulonglong records;
+      handler *file= table->file;
+
+      /* Calculate cost of distinct scan on index */
+      if (best_index_tree && read_plan->quick_prefix_records)
+        records=       read_plan->quick_prefix_records;
+      else
+        records= table->stat_records();
+
+      best_cost= file->cost(file->ha_key_scan_time(index, records));
+      /* We only have 'best_records' left after duplication elimination */
+      best_cost+= best_records * WHERE_COST_THD(thd);
+      duplicate_removal_cost= (DUPLICATE_REMOVAL_COST * records);
+
+      if (best_cost < read_plan->read_cost + duplicate_removal_cost)
+      {
+        read_plan->read_cost= best_cost;
+        read_plan->use_index_scan();
+        trace_group.
+          add("scan_cost", best_cost).
+          add("index_scan", true);
+      }
     }
 
     DBUG_PRINT("info",
diff --git a/sql/optimizer_costs.h b/sql/optimizer_costs.h
index 698cdbfe41e..3b2300b9019 100644
--- a/sql/optimizer_costs.h
+++ b/sql/optimizer_costs.h
@@ -87,6 +87,8 @@ extern OPTIMIZER_COSTS heap_optimizer_costs, tmp_table_optimizer_costs;
 #define TABLE_SCAN_SETUP_COST   optimizer_scan_setup_cost
 #define TABLE_SCAN_SETUP_COST_THD(THD) (THD)->variables.optimizer_scan_setup_cost
 #define INDEX_SCAN_SETUP_COST   optimizer_scan_setup_cost/2
+/* Cost for doing duplicate removal in test_quick_select */
+#define DUPLICATE_REMOVAL_COST  default_optimizer_costs.key_copy_cost
 
 /* Default fill factors of an (b-tree) index block is assumed to be 0.75 */
 #define INDEX_BLOCK_FILL_FACTOR_DIV 3
author	Monty <monty@mariadb.org>	2022-10-04 12:59:43 +0300
committer	Monty <monty@mariadb.org>	2022-10-04 16:11:19 +0300
commit	a35f715f440eb9ce93e4245cb1e4e5bc6f6972a2 (patch)
tree	1efc35fe4ccfa66afdd067483d9ceff74830d105
parent	a5d799298aa88d78998bc964743964abc70b84bd (diff)
download	mariadb-git-a35f715f440eb9ce93e4245cb1e4e5bc6f6972a2.tar.gz