diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2020-05-20 18:59:52 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2020-11-26 01:20:49 +0530 |
commit | efb171c2eac06508489804601f20c702bca1954c (patch) | |
tree | 7da131efa4776cce13a51b632dad3f6df961ec5e /sql/field.cc | |
parent | c498250888ec126fddda2867d1239b2a7734482f (diff) | |
download | mariadb-git-10.6-mdev22360.tar.gz |
MDEV-22360: Sufficient conditions for accurate calculation of join cardinality10.6-mdev22360
The aim of this task is to check if the estimate of join cardinality are accurate or not.
The implementation to check if we have the accurate estimate of the join cardinality is a
simple one, we have to walk over the WHERE clause.
The approach can be broken into 2 cases:
Case 1: WHERE clause is an AND conjunct
For an AND item at the top level, we need to walk over all the top level conjuncts and call walk
individually on them. This is done in such a way because for an AND conjunct at the top
level we may have accurate selectivity, even if the predicate belongs to a different column.
Eg: t1.a > 10 and t2.a < 5.
For this AND item we will have accurate selectivities.
For AND conjuncts (not at the top level), the entire conjunct needs to be resolved to one column.
Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0)
Case 2:
2a) OR item
For an OR item at the top level, we need to make sure that all the columns inside the OR
conjunct need to belong to one column directly or indirectly.
This needs to happen for an OR conjunct even if it is not at the
top level.
Eg: (t1.a > 5 or t1.a < 0);
2b) Single predicate at the top level
Eg:
t1.a= t2.a [ For this case we need to make sure we know number of distinct values for t1.a and t2.a ]
t1.a > 5 [ sargable predicate, get the estimate from the range optimizer ]
We need to make sure that for the predicates in the WHERE clause we have estimates either
from the first component of the index or from the EITS.
The implementation of these is covered with the callback
function passed to walk function.
Diffstat (limited to 'sql/field.cc')
-rw-r--r-- | sql/field.cc | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/sql/field.cc b/sql/field.cc index fe3aebce05d..91dc1deb360 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -36,6 +36,8 @@ #include "tztime.h" // struct Time_zone #include "filesort.h" // change_double_for_sort #include "log_event.h" // class Table_map_log_event +#include "sql_statistics.h" +#include "sql_partition.h" #include <m_ctype.h> // Maximum allowed exponent value for converting string to decimal @@ -1851,6 +1853,7 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, field_index= 0; cond_selectivity= 1.0; next_equal_field= NULL; + stats_available= 0; } @@ -11358,6 +11361,203 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length) } +/* + @brief + Check if statistics for a column are available via keys + + @details + If the column is the first component of a key, then statistics + for the column are available from the range optimizer. + Sets the bit in Field::stats_table + a) NDV is available + b) Statistics are available for the non-const argument of a + range predicate +*/ + +void Field::statistics_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (keyinfo->usable_key_parts == 1 && + field_index + 1 == keyinfo->key_part->fieldnr) + { + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + return; + } + } +} + + +/* + @brief + Check if statistics for a column are available via stat tables +*/ + +void Field::statistics_available_via_stat_tables() +{ + THD *thd= table->in_use; + if (!(check_eits_preferred(thd) && + thd->variables.optimizer_use_condition_selectivity > 2)) + return; + if (!(table->stats_is_read && + read_stats && !read_stats->no_stat_values_provided())) + return; + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)) + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); +} + + +/* + @brief + Check if statistics for a column are available via indexes or stat tables + + @retval + TRUE : statistics available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_statistics_available_for_range_predicates() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE)); +} + + +void Field::is_statistics_available() +{ + statistics_available_via_keys(); + statistics_available_via_stat_tables(); + is_ndv_available_via_keys(); + is_ndv_available_via_stat_tables(); +} + +/* + @brief + Check if ndv for a column are available via indexes or stat tables + + @retval + TRUE : ndv available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_ndv_available() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE)); +} + + +/* + @brief + Check if number of distinct values(ndv) for a column are available via keys + + @retval + TRUE : ndv available from keys + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0)) + { + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; + } + } + return false; +} + + +/* + @brief + Check if ndv for a column are available via statistical tables + + @retval + TRUE : ndv available from statistical tables + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_stat_tables() +{ + if (!check_eits_preferred(table->in_use)) + return false; + if (!(read_stats && !read_stats->no_stat_values_provided() && + !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))) + return false; + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; +} + + +/* + @brief + Checks if a field is the first component of a given key + + @param + key given key + + @retval + TRUE : field is the first component of the given key + FALSE : otherwise +*/ + +bool Field::is_first_component_of_key(KEY *key) +{ + DBUG_ASSERT(key->usable_key_parts >= 1); + return field_index + 1 == key->key_part->fieldnr; +} + + +/* + Check whether EITS statistics for a field are usable or not + + TRUE : Use EITS for the columns + FALSE: Otherwise +*/ + +bool Field::is_eits_usable() +{ + // check if column_statistics was allocated for this field + if (!read_stats) + return false; + + DBUG_ASSERT(table->stats_is_read); + + /* + (1): checks if we have EITS statistics for a particular column + (2): Don't use EITS for GEOMETRY columns + (3): Disabling reading EITS statistics for columns involved in the + partition list of a table. We assume the selectivity for + such columns would be handled during partition pruning. + */ + + return !read_stats->no_stat_values_provided() && //(1) + type() != MYSQL_TYPE_GEOMETRY && //(2) +#ifdef WITH_PARTITION_STORAGE_ENGINE + (!table->part_info || + !table->part_info->field_in_partition_expr(this)) && //(3) +#endif + true; +} + + Virtual_column_info* Virtual_column_info::clone(THD *thd) { Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this); |