From f86c438f7caa0212c6d029e89e748953a1fab1cf Mon Sep 17 00:00:00 2001
From: Mattias Jonsson <mattias.jonsson@sun.com>
Date: Tue, 17 Nov 2009 20:02:16 +0100
Subject: Bug#48846: Too much time spent in ha_partition::records_in_range if
 not able to prune

Problem was that ha_partition::records_in_range called
records_in_range for all non pruned partitions, even if
an estimate should be given.

Solution is to only use 1/3 of the partitions (up to 10) for
records_in_range and estimate the total from this subset.
(And continue until a non zero return value from the called
partitions records_in_range is given, since 0 means no rows
will match.)
---
 sql/ha_partition.cc | 144 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 93 insertions(+), 51 deletions(-)

(limited to 'sql/ha_partition.cc')

diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 451631ff373..b854e270029 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -5747,6 +5747,23 @@ const key_map *ha_partition::keys_to_use_for_scanning()
   DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
 }
 
+#define MAX_PARTS_FOR_OPTIMIZER_CALLS 10
+/*
+  Prepare start variables for estimating optimizer costs.
+
+  @param[out] num_used_parts  Number of partitions after pruning.
+  @param[out] check_min_num   Number of partitions to call.
+  @param[out] first           first used partition.
+*/
+void ha_partition::partitions_optimizer_call_preparations(uint *first,
+                                                          uint *num_used_parts,
+                                                          uint *check_min_num)
+{
+  *first= bitmap_get_first_set(&(m_part_info->used_partitions));
+  *num_used_parts= bitmap_bits_set(&(m_part_info->used_partitions));
+  *check_min_num= min(MAX_PARTS_FOR_OPTIMIZER_CALLS, *num_used_parts);
+}
+
 
 /*
   Return time for a scan of the table
@@ -5760,43 +5777,67 @@ const key_map *ha_partition::keys_to_use_for_scanning()
 
 double ha_partition::scan_time()
 {
-  double scan_time= 0;
-  handler **file;
+  double scan_time= 0.0;
+  uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
   DBUG_ENTER("ha_partition::scan_time");
 
-  for (file= m_file; *file; file++)
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-      scan_time+= (*file)->scan_time();
+  partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
+  for (part_id= first; partitions_called < num_used_parts ; part_id++)
+  {
+    if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
+      continue;
+    scan_time+= m_file[part_id]->scan_time();
+    partitions_called++;
+    if (partitions_called >= check_min_num && scan_time != 0.0)
+    {
+      DBUG_RETURN(scan_time *
+                      (double) num_used_parts / (double) partitions_called);
+    }
+  }
   DBUG_RETURN(scan_time);
 }
 
 
 /*
-  Get time to read
+  Estimate rows for records_in_range or estimate_rows_upper_bound.
 
-  SYNOPSIS
-    read_time()
-    index                Index number used
-    ranges               Number of ranges
-    rows                 Number of rows
-
-  RETURN VALUE
-    time for read
+  @param is_records_in_range  call records_in_range instead of
+                              estimate_rows_upper_bound.
+  @param inx                  (only for records_in_range) index to use.
+  @param min_key              (only for records_in_range) start of range.
+  @param max_key              (only for records_in_range) end of range.
 
-  DESCRIPTION
-    This will be optimised later to include whether or not the index can
-    be used with partitioning. To achieve we need to add another parameter
-    that specifies how many of the index fields that are bound in the ranges.
-    Possibly added as a new call to handlers.
+  @return Number of rows or HA_POS_ERROR.
 */
-
-double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+ha_rows ha_partition::estimate_rows(bool is_records_in_range, uint inx,
+                                    key_range *min_key, key_range *max_key)
 {
-  DBUG_ENTER("ha_partition::read_time");
+  ha_rows rows, estimated_rows= 0;
+  uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
+  DBUG_ENTER("ha_partition::records_in_range");
 
-  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
+  partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
+  for (part_id= first; partitions_called < num_used_parts ; part_id++)
+  {
+    if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
+      continue;
+    if (is_records_in_range)
+      rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
+    else
+      rows= m_file[part_id]->estimate_rows_upper_bound();
+    if (rows == HA_POS_ERROR)
+      DBUG_RETURN(HA_POS_ERROR);
+    estimated_rows+= rows;
+    partitions_called++;
+    if (partitions_called >= check_min_num && estimated_rows)
+    {
+      DBUG_RETURN(estimated_rows * num_used_parts / partitions_called);
+    }
+  }
+  DBUG_RETURN(estimated_rows);
 }
 
+
 /*
   Find number of records in a range
 
@@ -5824,22 +5865,9 @@ double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
 ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
 				       key_range *max_key)
 {
-  handler **file;
-  ha_rows in_range= 0;
   DBUG_ENTER("ha_partition::records_in_range");
 
-  file= m_file;
-  do
-  {
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-    {
-      ha_rows tmp_in_range= (*file)->records_in_range(inx, min_key, max_key);
-      if (tmp_in_range == HA_POS_ERROR)
-        DBUG_RETURN(tmp_in_range);
-      in_range+= tmp_in_range;
-    }
-  } while (*(++file));
-  DBUG_RETURN(in_range);
+  DBUG_RETURN(estimate_rows(TRUE, inx, min_key, max_key));
 }
 
 
@@ -5855,22 +5883,36 @@ ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
 
 ha_rows ha_partition::estimate_rows_upper_bound()
 {
-  ha_rows rows, tot_rows= 0;
-  handler **file;
   DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
 
-  file= m_file;
-  do
-  {
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-    {
-      rows= (*file)->estimate_rows_upper_bound();
-      if (rows == HA_POS_ERROR)
-        DBUG_RETURN(HA_POS_ERROR);
-      tot_rows+= rows;
-    }
-  } while (*(++file));
-  DBUG_RETURN(tot_rows);
+  DBUG_RETURN(estimate_rows(FALSE, 0, NULL, NULL));
+}
+
+
+/*
+  Get time to read
+
+  SYNOPSIS
+    read_time()
+    index                Index number used
+    ranges               Number of ranges
+    rows                 Number of rows
+
+  RETURN VALUE
+    time for read
+
+  DESCRIPTION
+    This will be optimised later to include whether or not the index can
+    be used with partitioning. To achieve we need to add another parameter
+    that specifies how many of the index fields that are bound in the ranges.
+    Possibly added as a new call to handlers.
+*/
+
+double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+{
+  DBUG_ENTER("ha_partition::read_time");
+
+  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
 }
 
 
-- 
cgit v1.2.1