summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
Diffstat (limited to 'sql')
-rw-r--r--sql/field.cc200
-rw-r--r--sql/field.h22
-rw-r--r--sql/item.cc213
-rw-r--r--sql/item.h40
-rw-r--r--sql/item_cmpfunc.cc113
-rw-r--r--sql/item_cmpfunc.h10
-rw-r--r--sql/item_func.h6
-rw-r--r--sql/opt_range.cc4
-rw-r--r--sql/sql_select.cc37
-rw-r--r--sql/sql_select.h1
-rw-r--r--sql/sql_statistics.cc38
-rw-r--r--sql/sql_statistics.h1
-rw-r--r--sql/table.cc1
13 files changed, 646 insertions, 40 deletions
diff --git a/sql/field.cc b/sql/field.cc
index fe3aebce05d..91dc1deb360 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -36,6 +36,8 @@
#include "tztime.h" // struct Time_zone
#include "filesort.h" // change_double_for_sort
#include "log_event.h" // class Table_map_log_event
+#include "sql_statistics.h"
+#include "sql_partition.h"
#include <m_ctype.h>
// Maximum allowed exponent value for converting string to decimal
@@ -1851,6 +1853,7 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
field_index= 0;
cond_selectivity= 1.0;
next_equal_field= NULL;
+ stats_available= 0;
}
@@ -11358,6 +11361,203 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length)
}
+/*
+ @brief
+ Check if statistics for a column are available via keys
+
+ @details
+ If the column is the first component of a key, then statistics
+ for the column are available from the range optimizer.
+ Sets the bit in Field::stats_table
+ a) NDV is available
+ b) Statistics are available for the non-const argument of a
+ range predicate
+*/
+
+void Field::statistics_available_via_keys()
+{
+ uint key;
+ key_map::Iterator it(key_start);
+ while ((key= it++) != key_map::Iterator::BITMAP_END)
+ {
+ KEY *keyinfo= table->key_info + key;
+ if (keyinfo->usable_key_parts == 1 &&
+ field_index + 1 == keyinfo->key_part->fieldnr)
+ {
+ stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+ return;
+ }
+ }
+}
+
+
+/*
+ @brief
+ Check if statistics for a column are available via stat tables
+*/
+
+void Field::statistics_available_via_stat_tables()
+{
+ THD *thd= table->in_use;
+ if (!(check_eits_preferred(thd) &&
+ thd->variables.optimizer_use_condition_selectivity > 2))
+ return;
+ if (!(table->stats_is_read &&
+ read_stats && !read_stats->no_stat_values_provided()))
+ return;
+ stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+ if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+}
+
+
+/*
+ @brief
+ Check if statistics for a column are available via indexes or stat tables
+
+ @retval
+ TRUE : statistics available for the column
+ FALSE : OTHERWISE
+*/
+
+bool Field::is_statistics_available_for_range_predicates()
+{
+ if (!(stats_available & (1 << STATISTICS_CACHED)))
+ {
+ is_statistics_available();
+ stats_available|= (1 << STATISTICS_CACHED);
+ }
+ return (stats_available & (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE));
+}
+
+
+void Field::is_statistics_available()
+{
+ statistics_available_via_keys();
+ statistics_available_via_stat_tables();
+ is_ndv_available_via_keys();
+ is_ndv_available_via_stat_tables();
+}
+
+/*
+ @brief
+ Check if ndv for a column are available via indexes or stat tables
+
+ @retval
+ TRUE : ndv available for the column
+ FALSE : OTHERWISE
+*/
+
+bool Field::is_ndv_available()
+{
+ if (!(stats_available & (1 << STATISTICS_CACHED)))
+ {
+ is_statistics_available();
+ stats_available|= (1 << STATISTICS_CACHED);
+ }
+ return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE));
+}
+
+
+/*
+ @brief
+ Check if number of distinct values(ndv) for a column are available via keys
+
+ @retval
+ TRUE : ndv available from keys
+ FALSE : otherwise
+*/
+
+bool Field::is_ndv_available_via_keys()
+{
+ uint key;
+ key_map::Iterator it(key_start);
+ while ((key= it++) != key_map::Iterator::BITMAP_END)
+ {
+ KEY *keyinfo= table->key_info + key;
+ if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0))
+ {
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ return true;
+ }
+ }
+ return false;
+}
+
+
+/*
+ @brief
+ Check if ndv for a column are available via statistical tables
+
+ @retval
+ TRUE : ndv available from statistical tables
+ FALSE : otherwise
+*/
+
+bool Field::is_ndv_available_via_stat_tables()
+{
+ if (!check_eits_preferred(table->in_use))
+ return false;
+ if (!(read_stats && !read_stats->no_stat_values_provided() &&
+ !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)))
+ return false;
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ return true;
+}
+
+
+/*
+ @brief
+ Checks if a field is the first component of a given key
+
+ @param
+ key given key
+
+ @retval
+ TRUE : field is the first component of the given key
+ FALSE : otherwise
+*/
+
+bool Field::is_first_component_of_key(KEY *key)
+{
+ DBUG_ASSERT(key->usable_key_parts >= 1);
+ return field_index + 1 == key->key_part->fieldnr;
+}
+
+
+/*
+ Check whether EITS statistics for a field are usable or not
+
+ TRUE : Use EITS for the columns
+ FALSE: Otherwise
+*/
+
+bool Field::is_eits_usable()
+{
+ // check if column_statistics was allocated for this field
+ if (!read_stats)
+ return false;
+
+ DBUG_ASSERT(table->stats_is_read);
+
+ /*
+ (1): checks if we have EITS statistics for a particular column
+ (2): Don't use EITS for GEOMETRY columns
+ (3): Disabling reading EITS statistics for columns involved in the
+ partition list of a table. We assume the selectivity for
+ such columns would be handled during partition pruning.
+ */
+
+ return !read_stats->no_stat_values_provided() && //(1)
+ type() != MYSQL_TYPE_GEOMETRY && //(2)
+#ifdef WITH_PARTITION_STORAGE_ENGINE
+ (!table->part_info ||
+ !table->part_info->field_in_partition_expr(this)) && //(3)
+#endif
+ true;
+}
+
+
Virtual_column_info* Virtual_column_info::clone(THD *thd)
{
Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this);
diff --git a/sql/field.h b/sql/field.h
index dfc02149f9d..c60c5e3bece 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -869,6 +869,18 @@ public:
*/
Column_statistics_collected *collected_stats;
+ enum stats_availability
+ {
+ STATISTICS_CACHED=0,
+ STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE,
+ STATISTICS_FOR_NDV_AVAILABLE
+ };
+
+ /*
+ Caches the value of whether statistics are available for a field or not.
+ */
+ uint stats_available;
+
/*
This is additional data provided for any computed(virtual) field,
default function or check constraint.
@@ -1907,6 +1919,16 @@ public:
/* Mark field in read map. Updates also virtual fields */
void register_field_in_read_map();
+ bool is_first_component_of_key(KEY *key);
+ void statistics_available_via_keys();
+ void statistics_available_via_stat_tables();
+ void is_statistics_available();
+ bool is_statistics_available_for_range_predicates();
+ bool is_ndv_available();
+ bool is_ndv_available_via_stat_tables();
+ bool is_ndv_available_via_keys();
+ bool is_eits_usable();
+
virtual Compression_method *compression_method() const { return 0; }
virtual Virtual_tmp_table **virtual_tmp_table_addr()
diff --git a/sql/item.cc b/sql/item.cc
index 52274380cd1..f80db684bb3 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd,
}
+/*
+ @brief
+ Check if selectivity estimates are accurate for a conditional formula
+
+ @details
+ This function checks whether this item belongs to a certain class of
+ condition for which we can calculate an accurate selectivity estimate.
+
+ The definition of the class of condition is recursive.
+ 1. Simple formula
+ a. Formula in the form of range predicates:
+
+ The predicate would be of type:
+ col op const
+ where op can be
+
+ op:
+ | >
+ | >=
+ | <
+ | <=
+ | =
+ | <>
+ Also the other cases are with
+ [NOT] IN predicate,
+ [NOT] NULL predicate and
+ LIKE predicate.
+ The predicate should have only one non-constant argument and
+ this argument will be a reference to a column that is used either
+ as the first component of an index or statistics are available via
+ statistical tables.
+
+ b. Equalities:
+ For an equality to have accurate selectivity estimates,
+ the number of distinct values for each column in the equality
+ needs to be known.
+ Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a)
+ For this case we need to make sure we know number of distinct
+ values for t1.a and t2.a
+
+ The number of distinct values for a column can be known by
+ 1) from indexes via rec_per_key
+ 2) from statistical tables via avg_frequency.
+
+ 2. AND / OR formula over formulas defined in section 1 of the definition.
+
+ a) AND Formula
+ For AND formula the check for accurate selectivity estimates depends
+ whether or not the AND formula is at the top level.
+
+ i) Top level
+ For an AND formula at the top level, we need to check if
+ accurate estimates are available for all the predicates
+ inside an AND formula.
+ If this is true then accurate selectivity estimates are available
+ for the AND formula.
+
+ Eg: t1.a > 10 and t2.a < 5
+
+ if we have accurate selectivity estimates
+ for t1.a > 10 and t2.a < 5 via indexes or statistical tables,
+ then selectivity estimates for this AND formula are accurate
+
+ ii) Non-top level
+ For all the predicates inside an AND formula
+ accurate selectivity estimates are needed
+ and each predicate need to be resolved by one
+ column (table column). If this scenario is satisfied then
+ accurate selectivity estimates is available for the AND formula.
+ Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0)
+
+ b) OR Formula
+
+ For an OR predicate, we need to make sure that the
+ whole OR predicate can be resolved by one column
+ directly or indirectly (that is via multiple equalities).
+ If this is possible then for the resolved column we need to have
+ statistics either from the first component of an index or
+ via statistical tables.
+
+ Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0);
+
+ In the end for all fields we may have selectivity from an index or
+ statistical tables.
+
+ @notes
+ The implementation for this function use the 'walk' method to traverse
+ the tree of this item with predicate_selectivity_checker() as the
+ call-back parameter of the method.
+
+
+ @retval
+ TRUE selectivity estimates are accurate
+ FALSE OTHERWISE
+*/
+
+bool Item::with_accurate_selectivity_estimation()
+{
+ if (type() == Item::COND_ITEM &&
+ ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
+ {
+ List_iterator<Item> li(*((Item_cond*) this)->argument_list());
+ Item *item;
+ while ((item= li++))
+ {
+ SAME_FIELD arg= {NULL, false};
+ if (item->walk(&Item::predicate_selectivity_checker, 0, &arg))
+ return false;
+ }
+ return true;
+ }
+ SAME_FIELD arg= {NULL, false};
+ return !walk(&Item::predicate_selectivity_checker, 0, &arg);
+}
+
+
static
Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel)
{
@@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
}
+/*
+ @brief
+ Checks if a formula of a condition contains the same column
+
+ @details
+ In the function we try to check if a formula of a condition depends
+ (directly or indirectly through equalities inferred from the
+ conjuncted multiple equalities) only on one column.
+
+ Eg:
+ WHERE clause is:
+ t1.a=t2.b and (t1.a > 5 or t2.b < 1);
+
+ the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
+ equalities to conclude that it depends on one column.
+
+ This is used mostly for OR conjuncts where we need to make sure
+ that the entire OR conjunct contains only one column, so that we may
+ get accurate estimates.
+
+ @retval
+ TRUE : the formula does not depend on one column
+ FALSE : OTHERWISE
+*/
+
+bool Item_field::dep_on_one_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
+
+ /*
+ The same_field_arg is passed as a parameter because when we start walking
+ over the condition tree we don't know which column the predicate will be
+ dependent on. So as soon as we encounter a leaf of the condition tree
+ which is a field item, we set the SAME_FIELD::item to the found
+ field item and then compare the rest of the fields in the predicate with
+ the field item.
+ */
+
+ if (same_field_arg->item == NULL)
+ {
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ field->is_statistics_available_for_range_predicates() ||
+ (item_equal &&
+ item_equal->is_statistics_available_for_range_predicates());
+ return !same_field_arg->is_stats_available;
+ }
+
+ /* Found the same field while traversing the condition tree */
+ DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
+ if (((Item_field*)same_field_arg->item->real_item())->field == field)
+ return false;
+
+ if (!same_field_arg->item->get_item_equal())
+ return true;
+
+ return !(same_field_arg->item->get_item_equal() == item_equal);
+}
+
+
+bool Item_direct_view_ref::dep_on_one_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
+ DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM);
+ Item_field *field_item= (Item_field*)real_item();
+
+ /*
+ The same_field_arg is passed as a parameter because when we start walking
+ over the condition tree we don't know which column the predicate will be
+ dependent on. So as soon as we encounter a leaf of the condition tree
+ which is a field item, we set the SAME_FIELD::item to the found
+ field item and then compare the rest of the fields in the predicate with
+ the field item.
+ */
+
+ if (same_field_arg->item == NULL)
+ {
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ field_item->field->is_statistics_available_for_range_predicates() ||
+ (item_equal &&
+ item_equal->is_statistics_available_for_range_predicates());
+ return !same_field_arg->is_stats_available;
+ }
+
+ /* Found the same field while traversing the condition tree */
+ DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
+ if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field)
+ return false;
+
+ if (!same_field_arg->item->get_item_equal())
+ return true;
+
+ return !(same_field_arg->item->get_item_equal() == item_equal);
+}
+
+
bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map)
{
table_map used= used_tables();
diff --git a/sql/item.h b/sql/item.h
index fb480b4c578..8a6b16ad82d 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -446,6 +446,26 @@ typedef struct replace_equal_field_arg
struct st_join_table *context_tab;
} REPLACE_EQUAL_FIELD_ARG;
+
+/*
+ Structure storing information for a field on which the entire predicate is
+ dependent on (directly or indirectly via equalities)
+*/
+typedef struct same_field
+{
+ /*
+ field item for the first encountered column while traversing
+ over the conditional predicate
+ */
+ Item *item;
+ /*
+ Set to true if the statistics for the field are available
+ directly (via keys or stat tables) or indirectly (via equalities)
+ */
+ bool is_stats_available;
+}SAME_FIELD;
+
+
class Settable_routine_parameter
{
public:
@@ -1968,7 +1988,25 @@ public:
virtual bool count_sargable_conds(void *arg) { return 0; }
virtual bool limit_index_condition_pushdown_processor(void *arg) { return 0; }
virtual bool exists2in_processor(void *arg) { return 0; }
+
virtual bool find_selective_predicates_list_processor(void *arg) { return 0; }
+
+ bool with_accurate_selectivity_estimation();
+
+ /*
+ @brief
+ Check if selectivity of a predicate is available via indexes or EITS
+
+ @param
+ arg Structure storing information whether the AND/OR conjunct
+ can be resolved via a single column.
+
+ @retval
+ FALSE : SUCCESS
+ TRUE : OTHERWISE
+ */
+ virtual bool predicate_selectivity_checker(void *arg) { return FALSE; }
+ virtual bool dep_on_one_column(void *arg) { return true; }
bool cleanup_is_expensive_cache_processor(void *arg)
{
is_expensive_cache= (int8)(-1);
@@ -3576,6 +3614,7 @@ public:
return field->table->pos_in_table_list->outer_join;
}
bool check_index_dependence(void *arg);
+ bool dep_on_one_column(void *arg);
friend class Item_default_value;
friend class Item_insert_value;
friend class st_select_lex_unit;
@@ -5946,6 +5985,7 @@ public:
Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg)
{ return this; }
Item *remove_item_direct_ref() { return this; }
+ bool dep_on_one_column(void *arg);
};
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 83eb605f463..8084af72f60 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2106,6 +2106,31 @@ bool Item_func_between::count_sargable_conds(void *arg)
return 0;
}
+bool Item_func_between::predicate_selectivity_checker(void *arg)
+{
+ if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
+ {
+ if (is_range_predicate(args[0], args[1]) &&
+ is_range_predicate(args[0], args[2]))
+ {
+ return args[0]->dep_on_one_column(arg);
+ }
+ return true;
+ }
+
+ for (uint i= 1 ; i < arg_count ; i++)
+ {
+ if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
+ {
+ if (!is_range_predicate(args[i], args[0]))
+ return true;
+ if (args[i]->dep_on_one_column(arg))
+ return true;
+ }
+ }
+ return false;
+}
+
void Item_func_between::fix_after_pullout(st_select_lex *new_parent,
Item **ref, bool merge)
@@ -4290,6 +4315,16 @@ bool Item_func_in::count_sargable_conds(void *arg)
}
+bool Item_func_in::predicate_selectivity_checker(void *arg)
+{
+ if (!all_items_are_consts(args + 1, arg_count - 1))
+ return true;
+ if (key_item()->dep_on_one_column(arg))
+ return true;
+ return false;
+}
+
+
bool Item_func_in::list_contains_null()
{
Item **arg,**arg_end;
@@ -5516,6 +5551,16 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg)
}
+bool Item_func_null_predicate::predicate_selectivity_checker(void *arg)
+{
+ if (is_range_predicate(args[0], NULL))
+ {
+ return args[0]->dep_on_one_column(arg);
+ }
+ return true;
+}
+
+
longlong Item_func_isnull::val_int()
{
DBUG_ASSERT(fixed == 1);
@@ -5597,6 +5642,17 @@ bool Item_bool_func2::count_sargable_conds(void *arg)
return 0;
}
+
+bool Item_bool_func2::predicate_selectivity_checker(void *arg)
+{
+ if (is_range_predicate(args[0], args[1]))
+ return args[0]->dep_on_one_column(arg);
+ if (is_range_predicate(args[1], args[0]))
+ return args[1]->dep_on_one_column(arg);
+ return true;
+}
+
+
void Item_func_like::print(String *str, enum_query_type query_type)
{
args[0]->print_parenthesised(str, query_type, precedence());
@@ -5696,8 +5752,18 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
bool sargable_pattern= with_sargable_pattern();
param->thd->mem_root= tmp_root;
return sargable_pattern ?
- Item_bool_func2::get_mm_tree(param, cond_ptr) :
- Item_func::get_mm_tree(param, cond_ptr);
+ Item_bool_func2::get_mm_tree(param, cond_ptr) :
+ Item_func::get_mm_tree(param, cond_ptr);
+}
+
+
+bool Item_func_like::predicate_selectivity_checker(void *arg)
+{
+ if (with_sargable_pattern())
+ {
+ return args[0]->dep_on_one_column(arg);
+ }
+ return true;
}
@@ -7139,6 +7205,49 @@ bool Item_equal::count_sargable_conds(void *arg)
}
+bool Item_equal::predicate_selectivity_checker(void *arg)
+{
+ /*
+ For equality conditions like tbl1.col = tbl2.col
+ we only want to know if the number of distinct values (ndv) is
+ available for all the fields in the multiple equality or not.
+ */
+ Item_equal_fields_iterator it(*this);
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ if (!(field->is_ndv_available()))
+ return true;
+ }
+
+ it.rewind();
+ Item *item;
+ SAME_FIELD *same_field= (SAME_FIELD *) arg;
+ while ((item= it++))
+ {
+ if (same_field->item)
+ return item->dep_on_one_column(arg);
+ item->dep_on_one_column(arg);
+ break;
+ }
+ return false;
+}
+
+
+bool Item_equal::is_statistics_available_for_range_predicates()
+{
+ bool found= false;
+ Item_equal_fields_iterator it(*this);
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ if (field->is_statistics_available_for_range_predicates())
+ found= true;
+ }
+ return found;
+}
+
+
/**
@brief
Evaluate multiple equality
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index fa715badfc7..f467efd0e10 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -227,6 +227,7 @@ public:
bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; }
uint decimal_precision() const { return 1; }
bool need_parentheses_in_default() { return true; }
+ bool predicate_selectivity_checker(void *arg) { return TRUE; }
};
@@ -418,6 +419,7 @@ public:
COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value,
bool top_level);
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
/*
Specifies which result type the function uses to compare its arguments.
This method is used in equal field propagation.
@@ -936,6 +938,7 @@ public:
bool find_not_null_fields(table_map allowed);
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge);
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
void add_key_fields(JOIN *join, KEY_FIELD **key_fields,
uint *and_level, table_map usable_tables,
SARGABLE_PARAM **sargables);
@@ -2473,6 +2476,7 @@ public:
bool find_not_null_fields(table_map allowed);
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge);
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_in>(thd, this); }
Item *build_clone(THD *thd)
@@ -2570,6 +2574,7 @@ public:
return FALSE;
}
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
};
@@ -2813,6 +2818,7 @@ public:
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_like>(thd, this); }
+ bool predicate_selectivity_checker(void *arg);
};
@@ -3220,6 +3226,8 @@ public:
uint elements_count() { return equal_items.elements; }
friend class Item_equal_fields_iterator;
bool count_sargable_conds(void *arg);
+ bool predicate_selectivity_checker(void *arg);
+ bool is_statistics_available_for_range_predicates();
Item *multiple_equality_transformer(THD *thd, uchar *arg);
friend class Item_equal_iterator<List_iterator_fast,Item>;
friend class Item_equal_iterator<List_iterator,Item>;
@@ -3371,6 +3379,7 @@ public:
SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_and>(thd, this); }
+ bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
inline bool is_cond_and(Item *item)
@@ -3395,6 +3404,7 @@ public:
Item *neg_transformer(THD *thd);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_or>(thd, this); }
+ bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
class Item_func_dyncol_check :public Item_bool_func
diff --git a/sql/item_func.h b/sql/item_func.h
index 6a4a9fa5dae..d04cc8643cf 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -399,6 +399,12 @@ public:
Item_func *get_item_func() { return this; }
bool is_simplified_cond_processor(void *arg)
{ return const_item() && !val_int(); }
+ bool predicate_selectivity_checker(void *arg)
+ {
+ if (const_item() && !is_expensive())
+ return false;
+ return true;
+ }
};
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index adc0572cb1c..d3f49097189 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3105,7 +3105,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index) &&
- is_eits_usable(field))
+ field->is_eits_usable())
parts++;
}
@@ -3126,7 +3126,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
Field *field= *field_ptr;
if (bitmap_is_set(used_fields, field->field_index))
{
- if (!is_eits_usable(field))
+ if (!field->is_eits_usable())
continue;
uint16 store_length;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index bb9d82a7039..3553ee78c5b 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8296,6 +8296,11 @@ choose_plan(JOIN *join, table_map join_tables)
jtab_sort_func, (void*)join->emb_sjm_nest);
Json_writer_object wrapper(thd);
+
+ if (join->conds)
+ wrapper.add("cardinality_accurate",
+ join->conds->with_accurate_selectivity_estimation());
+
Json_writer_array trace_plan(thd,"considered_execution_plans");
if (!join->emb_sjm_nest)
@@ -29449,6 +29454,38 @@ void unpack_to_base_table_fields(TABLE *table)
}
+
+/*
+ @brief
+ Checks if a predicate is a range predicate with a constant part
+
+ @param
+
+ @item the item referring to the field of the table
+ @value the item referring to the expression on the
+ rhs of a predicate
+
+ @details
+ Range predicate is defined as the form of field op const
+ where op can be operators like </<=/=/>/>=/BETWEEN etc.
+ Also the statistics for the field should be available via
+ an index or statistical tables.
+
+ @retval
+ TRUE : Success
+ FALSE : Otherwise
+*/
+
+bool is_range_predicate(Item *item, Item *value)
+{
+ Item *field= item->real_item();
+ if (field->type() == Item::FIELD_ITEM && !field->const_item() &&
+ (!value || !value->is_expensive()))
+ return true;
+ return false;
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 2aba63ddc79..4d5a6e0abf5 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -2531,5 +2531,6 @@ void propagate_new_equalities(THD *thd, Item *cond,
List<Item_equal> *new_equalities,
COND_EQUAL *inherited,
bool *is_simplifiable_cond);
+bool is_range_predicate(Item *item, Item *value);
#endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 2636299e330..71d472feaba 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -31,7 +31,6 @@
#include "opt_range.h"
#include "uniques.h"
#include "sql_show.h"
-#include "sql_partition.h"
/*
The system variable 'use_stat_tables' can take one of the
@@ -3159,7 +3158,10 @@ static void dump_stats_from_share_to_table(TABLE *table)
Field **field_ptr= table_share->field;
Field **table_field_ptr= table->field;
for ( ; *field_ptr; field_ptr++, table_field_ptr++)
+ {
(*table_field_ptr)->read_stats= (*field_ptr)->read_stats;
+ (*table_field_ptr)->stats_available= (*field_ptr)->stats_available;
+ }
table->stats_is_read= true;
}
@@ -3978,37 +3980,3 @@ bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table)
}
return false;
}
-
-/*
- Check wheter we can use EITS statistics for a field or not
-
- TRUE : Use EITS for the columns
- FALSE: Otherwise
-*/
-
-bool is_eits_usable(Field *field)
-{
- Column_statistics* col_stats= field->read_stats;
-
- // check if column_statistics was allocated for this field
- if (!col_stats)
- return false;
-
- DBUG_ASSERT(field->table->stats_is_read);
-
- /*
- (1): checks if we have EITS statistics for a particular column
- (2): Don't use EITS for GEOMETRY columns
- (3): Disabling reading EITS statistics for columns involved in the
- partition list of a table. We assume the selecticivity for
- such columns would be handled during partition pruning.
- */
-
- return !col_stats->no_stat_values_provided() && //(1)
- field->type() != MYSQL_TYPE_GEOMETRY && //(2)
-#ifdef WITH_PARTITION_STORAGE_ENGINE
- (!field->table->part_info ||
- !field->table->part_info->field_in_partition_expr(field)) && //(3)
-#endif
- true;
-}
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 20ecf06bfee..34471fd5270 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -138,7 +138,6 @@ double get_column_range_cardinality(Field *field,
key_range *max_endp,
uint range_flag);
bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table);
-bool is_eits_usable(Field* field);
class Histogram
{
diff --git a/sql/table.cc b/sql/table.cc
index c48a6fed89a..02f4ece9c40 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -5435,6 +5435,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
{
(*f_ptr)->next_equal_field= NULL;
(*f_ptr)->cond_selectivity= 1.0;
+ (*f_ptr)->stats_available= 0;
}
notnull_cond= 0;