summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2018-12-07 02:12:22 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2018-12-07 19:59:45 +0530
commit4886d14827c69877e8d089ae7c7f178a9a54ad7a (patch)
tree3e059163c76f6b52fae3b09d8e6cfee87da33814
parent12b1ba195cb0802053bc2fae3b507ec9721685f8 (diff)
downloadmariadb-git-4886d14827c69877e8d089ae7c7f178a9a54ad7a.tar.gz
MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
The problem here is EITS statistics does not calculate statistics for the partitions of the table. So a temporary solution would be to not read EITS statistics for partitioned tables. Also disabling reading of EITS for columns that participate in the partition list of a table.
-rw-r--r--mysql-test/r/partition.result100
-rw-r--r--mysql-test/t/partition.test61
-rw-r--r--sql/opt_range.cc19
-rw-r--r--sql/partition_info.cc17
-rw-r--r--sql/partition_info.h1
-rw-r--r--sql/sql_statistics.cc45
-rw-r--r--sql/sql_statistics.h1
7 files changed, 235 insertions, 9 deletions
diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result
index c6669176b3d..6732782c5f7 100644
--- a/mysql-test/r/partition.result
+++ b/mysql-test/r/partition.result
@@ -2645,3 +2645,103 @@ Warnings:
Note 1517 Duplicate partition name p2
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
+#
+# MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+part_key int,
+a int,
+b int
+) partition by list(part_key) (
+partition p0 values in (0),
+partition p1 values in (1),
+partition p2 values in (2),
+partition p3 values in (3),
+partition p4 values in (4)
+);
+insert into t2
+select mod(a,5), a/100, mod(a,5) from t1;
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+#
+# Tests using stats provided by the storage engine
+#
+explain extended select * from t2 where part_key=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key in (1,2);
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
+explain extended select * from t2 where b=5;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
+explain partitions select * from t2 where b=5;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
+explain extended select * from t2 partition(p0) where b=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
+set @save_histogram_size=@@histogram_size;
+set @@histogram_size=100;
+set @@use_stat_tables= PREFERABLY;
+set @@optimizer_use_condition_selectivity=4;
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+#
+# Tests using EITS
+#
+# filtered should be 100
+explain extended select * from t2 where part_key=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+# filtered should be 100
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key in (1,2);
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
+explain extended select * from t2 where b=5;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 19.80 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
+explain partitions select * from t2 where b=5;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
+explain extended select * from t2 partition(p0) where b=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 19.80 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
+set @@use_stat_tables= @save_use_stat_tables;
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set @@histogram_size= @save_histogram_size;
+drop table t0,t1,t2;
diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test
index 1c8cd0375d6..b6a5db2db7c 100644
--- a/mysql-test/t/partition.test
+++ b/mysql-test/t/partition.test
@@ -2897,3 +2897,64 @@ EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
+--echo #
+--echo # MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
+--echo #
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+
+create table t2 (
+ part_key int,
+ a int,
+ b int
+) partition by list(part_key) (
+ partition p0 values in (0),
+ partition p1 values in (1),
+ partition p2 values in (2),
+ partition p3 values in (3),
+ partition p4 values in (4)
+);
+insert into t2
+select mod(a,5), a/100, mod(a,5) from t1;
+
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+--echo #
+--echo # Tests using stats provided by the storage engine
+--echo #
+explain extended select * from t2 where part_key=1;
+explain partitions select * from t2 where part_key=1;
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key in (1,2);
+explain extended select * from t2 where b=5;
+explain partitions select * from t2 where b=5;
+explain extended select * from t2 partition(p0) where b=1;
+
+
+set @save_histogram_size=@@histogram_size;
+set @@histogram_size=100;
+set @@use_stat_tables= PREFERABLY;
+set @@optimizer_use_condition_selectivity=4;
+analyze table t2;
+--echo #
+--echo # Tests using EITS
+--echo #
+--echo # filtered should be 100
+explain extended select * from t2 where part_key=1;
+explain partitions select * from t2 where part_key=1;
+--echo # filtered should be 100
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key in (1,2);
+explain extended select * from t2 where b=5;
+explain partitions select * from t2 where b=5;
+explain extended select * from t2 partition(p0) where b=1;
+
+set @@use_stat_tables= @save_use_stat_tables;
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set @@histogram_size= @save_histogram_size;
+drop table t0,t1,t2;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index a3943cbe3ff..005ae92a665 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3322,14 +3322,17 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field **field_ptr;
TABLE *table= param->table;
+ partition_info *part_info= NULL;
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ part_info= table->part_info;
+ #endif
uint parts= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
- Column_statistics* col_stats= (*field_ptr)->read_stats;
- if (bitmap_is_set(used_fields, (*field_ptr)->field_index)
- && col_stats && !col_stats->no_stat_values_provided()
- && !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY))
+ Field *field= *field_ptr;
+ if (bitmap_is_set(used_fields, field->field_index) &&
+ is_eits_usable(field))
parts++;
}
@@ -3347,12 +3350,10 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
uint max_key_len= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
- if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
+ Field *field= *field_ptr;
+ if (bitmap_is_set(used_fields, field->field_index))
{
- Field *field= *field_ptr;
- Column_statistics* col_stats= field->read_stats;
- if (field->type() == MYSQL_TYPE_GEOMETRY ||
- !col_stats || col_stats->no_stat_values_provided())
+ if (!is_eits_usable(field))
continue;
uint16 store_length;
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index 52bda560c1c..9d7d0d92686 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -3164,6 +3164,23 @@ void partition_info::print_debug(const char *str, uint *value)
DBUG_PRINT("info", ("parser: %s", str));
DBUG_VOID_RETURN;
}
+
+bool partition_info::field_in_partition_expr(Field *field) const
+{
+ uint i;
+ for (i= 0; i < num_part_fields; i++)
+ {
+ if (field->eq(part_field_array[i]))
+ return TRUE;
+ }
+ for (i= 0; i < num_subpart_fields; i++)
+ {
+ if (field->eq(subpart_field_array[i]))
+ return TRUE;
+ }
+ return FALSE;
+}
+
#else /* WITH_PARTITION_STORAGE_ENGINE */
/*
For builds without partitioning we need to define these functions
diff --git a/sql/partition_info.h b/sql/partition_info.h
index f250c5496bf..10b8954ace7 100644
--- a/sql/partition_info.h
+++ b/sql/partition_info.h
@@ -384,6 +384,7 @@ private:
bool is_full_part_expr_in_fields(List<Item> &fields);
public:
bool has_unique_name(partition_element *element);
+ bool field_in_partition_expr(Field *field) const;
};
uint32 get_next_partition_id_range(struct st_partition_iter* part_iter);
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index cb75a5c2176..0c359a29431 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -30,6 +30,7 @@
#include "opt_range.h"
#include "my_atomic.h"
#include "sql_show.h"
+#include "sql_partition.h"
/*
The system variable 'use_stat_tables' can take one of the
@@ -3589,6 +3590,22 @@ void set_statistics_for_table(THD *thd, TABLE *table)
(use_stat_table_mode <= COMPLEMENTARY ||
!table->stats_is_read || read_stats->cardinality_is_null) ?
table->file->stats.records : read_stats->cardinality;
+
+ /*
+ For partitioned table, EITS statistics is based on data from all partitions.
+
+ On the other hand, Partition Pruning figures which partitions will be
+ accessed and then computes the estimate of rows in used_partitions.
+
+ Use the estimate from Partition Pruning as it is typically more precise.
+ Ideally, EITS should provide per-partition statistics but this is not
+ implemented currently.
+ */
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ if (table->part_info)
+ table->used_stat_records= table->file->stats.records;
+ #endif
+
KEY *key_info, *key_info_end;
for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
key_info < key_info_end; key_info++)
@@ -3904,3 +3921,31 @@ bool is_stat_table(const char *db, const char *table)
}
return false;
}
+
+/*
+ Check wheter we can use EITS statistics for a field or not
+
+ TRUE : Use EITS for the columns
+ FALSE: Otherwise
+*/
+
+bool is_eits_usable(Field *field)
+{
+ partition_info *part_info= NULL;
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ part_info= field->table->part_info;
+ #endif
+ /*
+ (1): checks if we have EITS statistics for a particular column
+ (2): Don't use EITS for GEOMETRY columns
+ (3): Disabling reading EITS statistics for columns involved in the
+ partition list of a table. We assume the selecticivity for
+ such columns would be handled during partition pruning.
+ */
+ Column_statistics* col_stats= field->read_stats;
+ if (col_stats && !col_stats->no_stat_values_provided() && //(1)
+ field->type() != MYSQL_TYPE_GEOMETRY && //(2)
+ (!part_info || !part_info->field_in_partition_expr(field))) //(3)
+ return TRUE;
+ return FALSE;
+}
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index f28d56e4a69..a891bef3164 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -109,6 +109,7 @@ double get_column_range_cardinality(Field *field,
key_range *max_endp,
uint range_flag);
bool is_stat_table(const char *db, const char *table);
+bool is_eits_usable(Field* field);
class Histogram
{