diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/field.cc | 200 | ||||
-rw-r--r-- | sql/field.h | 22 | ||||
-rw-r--r-- | sql/item.cc | 213 | ||||
-rw-r--r-- | sql/item.h | 40 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 113 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 10 | ||||
-rw-r--r-- | sql/item_func.h | 6 | ||||
-rw-r--r-- | sql/opt_range.cc | 4 | ||||
-rw-r--r-- | sql/sql_select.cc | 37 | ||||
-rw-r--r-- | sql/sql_select.h | 1 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 38 | ||||
-rw-r--r-- | sql/sql_statistics.h | 1 | ||||
-rw-r--r-- | sql/table.cc | 1 |
13 files changed, 646 insertions, 40 deletions
diff --git a/sql/field.cc b/sql/field.cc index fe3aebce05d..91dc1deb360 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -36,6 +36,8 @@ #include "tztime.h" // struct Time_zone #include "filesort.h" // change_double_for_sort #include "log_event.h" // class Table_map_log_event +#include "sql_statistics.h" +#include "sql_partition.h" #include <m_ctype.h> // Maximum allowed exponent value for converting string to decimal @@ -1851,6 +1853,7 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, field_index= 0; cond_selectivity= 1.0; next_equal_field= NULL; + stats_available= 0; } @@ -11358,6 +11361,203 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length) } +/* + @brief + Check if statistics for a column are available via keys + + @details + If the column is the first component of a key, then statistics + for the column are available from the range optimizer. + Sets the bit in Field::stats_table + a) NDV is available + b) Statistics are available for the non-const argument of a + range predicate +*/ + +void Field::statistics_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (keyinfo->usable_key_parts == 1 && + field_index + 1 == keyinfo->key_part->fieldnr) + { + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + return; + } + } +} + + +/* + @brief + Check if statistics for a column are available via stat tables +*/ + +void Field::statistics_available_via_stat_tables() +{ + THD *thd= table->in_use; + if (!(check_eits_preferred(thd) && + thd->variables.optimizer_use_condition_selectivity > 2)) + return; + if (!(table->stats_is_read && + read_stats && !read_stats->no_stat_values_provided())) + return; + stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE); + if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)) + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); +} + + +/* + @brief + Check if statistics for a column are available via indexes or stat tables + + @retval + TRUE : statistics available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_statistics_available_for_range_predicates() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE)); +} + + +void Field::is_statistics_available() +{ + statistics_available_via_keys(); + statistics_available_via_stat_tables(); + is_ndv_available_via_keys(); + is_ndv_available_via_stat_tables(); +} + +/* + @brief + Check if ndv for a column are available via indexes or stat tables + + @retval + TRUE : ndv available for the column + FALSE : OTHERWISE +*/ + +bool Field::is_ndv_available() +{ + if (!(stats_available & (1 << STATISTICS_CACHED))) + { + is_statistics_available(); + stats_available|= (1 << STATISTICS_CACHED); + } + return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE)); +} + + +/* + @brief + Check if number of distinct values(ndv) for a column are available via keys + + @retval + TRUE : ndv available from keys + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_keys() +{ + uint key; + key_map::Iterator it(key_start); + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *keyinfo= table->key_info + key; + if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0)) + { + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; + } + } + return false; +} + + +/* + @brief + Check if ndv for a column are available via statistical tables + + @retval + TRUE : ndv available from statistical tables + FALSE : otherwise +*/ + +bool Field::is_ndv_available_via_stat_tables() +{ + if (!check_eits_preferred(table->in_use)) + return false; + if (!(read_stats && !read_stats->no_stat_values_provided() && + !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))) + return false; + stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE); + return true; +} + + +/* + @brief + Checks if a field is the first component of a given key + + @param + key given key + + @retval + TRUE : field is the first component of the given key + FALSE : otherwise +*/ + +bool Field::is_first_component_of_key(KEY *key) +{ + DBUG_ASSERT(key->usable_key_parts >= 1); + return field_index + 1 == key->key_part->fieldnr; +} + + +/* + Check whether EITS statistics for a field are usable or not + + TRUE : Use EITS for the columns + FALSE: Otherwise +*/ + +bool Field::is_eits_usable() +{ + // check if column_statistics was allocated for this field + if (!read_stats) + return false; + + DBUG_ASSERT(table->stats_is_read); + + /* + (1): checks if we have EITS statistics for a particular column + (2): Don't use EITS for GEOMETRY columns + (3): Disabling reading EITS statistics for columns involved in the + partition list of a table. We assume the selectivity for + such columns would be handled during partition pruning. + */ + + return !read_stats->no_stat_values_provided() && //(1) + type() != MYSQL_TYPE_GEOMETRY && //(2) +#ifdef WITH_PARTITION_STORAGE_ENGINE + (!table->part_info || + !table->part_info->field_in_partition_expr(this)) && //(3) +#endif + true; +} + + Virtual_column_info* Virtual_column_info::clone(THD *thd) { Virtual_column_info* dst= new (thd->mem_root) Virtual_column_info(*this); diff --git a/sql/field.h b/sql/field.h index dfc02149f9d..c60c5e3bece 100644 --- a/sql/field.h +++ b/sql/field.h @@ -869,6 +869,18 @@ public: */ Column_statistics_collected *collected_stats; + enum stats_availability + { + STATISTICS_CACHED=0, + STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE, + STATISTICS_FOR_NDV_AVAILABLE + }; + + /* + Caches the value of whether statistics are available for a field or not. + */ + uint stats_available; + /* This is additional data provided for any computed(virtual) field, default function or check constraint. @@ -1907,6 +1919,16 @@ public: /* Mark field in read map. Updates also virtual fields */ void register_field_in_read_map(); + bool is_first_component_of_key(KEY *key); + void statistics_available_via_keys(); + void statistics_available_via_stat_tables(); + void is_statistics_available(); + bool is_statistics_available_for_range_predicates(); + bool is_ndv_available(); + bool is_ndv_available_via_stat_tables(); + bool is_ndv_available_via_keys(); + bool is_eits_usable(); + virtual Compression_method *compression_method() const { return 0; } virtual Virtual_tmp_table **virtual_tmp_table_addr() diff --git a/sql/item.cc b/sql/item.cc index 52274380cd1..f80db684bb3 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd, } +/* + @brief + Check if selectivity estimates are accurate for a conditional formula + + @details + This function checks whether this item belongs to a certain class of + condition for which we can calculate an accurate selectivity estimate. + + The definition of the class of condition is recursive. + 1. Simple formula + a. Formula in the form of range predicates: + + The predicate would be of type: + col op const + where op can be + + op: + | > + | >= + | < + | <= + | = + | <> + Also the other cases are with + [NOT] IN predicate, + [NOT] NULL predicate and + LIKE predicate. + The predicate should have only one non-constant argument and + this argument will be a reference to a column that is used either + as the first component of an index or statistics are available via + statistical tables. + + b. Equalities: + For an equality to have accurate selectivity estimates, + the number of distinct values for each column in the equality + needs to be known. + Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a) + For this case we need to make sure we know number of distinct + values for t1.a and t2.a + + The number of distinct values for a column can be known by + 1) from indexes via rec_per_key + 2) from statistical tables via avg_frequency. + + 2. AND / OR formula over formulas defined in section 1 of the definition. + + a) AND Formula + For AND formula the check for accurate selectivity estimates depends + whether or not the AND formula is at the top level. + + i) Top level + For an AND formula at the top level, we need to check if + accurate estimates are available for all the predicates + inside an AND formula. + If this is true then accurate selectivity estimates are available + for the AND formula. + + Eg: t1.a > 10 and t2.a < 5 + + if we have accurate selectivity estimates + for t1.a > 10 and t2.a < 5 via indexes or statistical tables, + then selectivity estimates for this AND formula are accurate + + ii) Non-top level + For all the predicates inside an AND formula + accurate selectivity estimates are needed + and each predicate need to be resolved by one + column (table column). If this scenario is satisfied then + accurate selectivity estimates is available for the AND formula. + Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0) + + b) OR Formula + + For an OR predicate, we need to make sure that the + whole OR predicate can be resolved by one column + directly or indirectly (that is via multiple equalities). + If this is possible then for the resolved column we need to have + statistics either from the first component of an index or + via statistical tables. + + Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0); + + In the end for all fields we may have selectivity from an index or + statistical tables. + + @notes + The implementation for this function use the 'walk' method to traverse + the tree of this item with predicate_selectivity_checker() as the + call-back parameter of the method. + + + @retval + TRUE selectivity estimates are accurate + FALSE OTHERWISE +*/ + +bool Item::with_accurate_selectivity_estimation() +{ + if (type() == Item::COND_ITEM && + ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> li(*((Item_cond*) this)->argument_list()); + Item *item; + while ((item= li++)) + { + SAME_FIELD arg= {NULL, false}; + if (item->walk(&Item::predicate_selectivity_checker, 0, &arg)) + return false; + } + return true; + } + SAME_FIELD arg= {NULL, false}; + return !walk(&Item::predicate_selectivity_checker, 0, &arg); +} + + static Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel) { @@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) } +/* + @brief + Checks if a formula of a condition contains the same column + + @details + In the function we try to check if a formula of a condition depends + (directly or indirectly through equalities inferred from the + conjuncted multiple equalities) only on one column. + + Eg: + WHERE clause is: + t1.a=t2.b and (t1.a > 5 or t2.b < 1); + + the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of + equalities to conclude that it depends on one column. + + This is used mostly for OR conjuncts where we need to make sure + that the entire OR conjunct contains only one column, so that we may + get accurate estimates. + + @retval + TRUE : the formula does not depend on one column + FALSE : OTHERWISE +*/ + +bool Item_field::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + +bool Item_direct_view_ref::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + Item_field *field_item= (Item_field*)real_item(); + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field_item->field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) { table_map used= used_tables(); diff --git a/sql/item.h b/sql/item.h index fb480b4c578..8a6b16ad82d 100644 --- a/sql/item.h +++ b/sql/item.h @@ -446,6 +446,26 @@ typedef struct replace_equal_field_arg struct st_join_table *context_tab; } REPLACE_EQUAL_FIELD_ARG; + +/* + Structure storing information for a field on which the entire predicate is + dependent on (directly or indirectly via equalities) +*/ +typedef struct same_field +{ + /* + field item for the first encountered column while traversing + over the conditional predicate + */ + Item *item; + /* + Set to true if the statistics for the field are available + directly (via keys or stat tables) or indirectly (via equalities) + */ + bool is_stats_available; +}SAME_FIELD; + + class Settable_routine_parameter { public: @@ -1968,7 +1988,25 @@ public: virtual bool count_sargable_conds(void *arg) { return 0; } virtual bool limit_index_condition_pushdown_processor(void *arg) { return 0; } virtual bool exists2in_processor(void *arg) { return 0; } + virtual bool find_selective_predicates_list_processor(void *arg) { return 0; } + + bool with_accurate_selectivity_estimation(); + + /* + @brief + Check if selectivity of a predicate is available via indexes or EITS + + @param + arg Structure storing information whether the AND/OR conjunct + can be resolved via a single column. + + @retval + FALSE : SUCCESS + TRUE : OTHERWISE + */ + virtual bool predicate_selectivity_checker(void *arg) { return FALSE; } + virtual bool dep_on_one_column(void *arg) { return true; } bool cleanup_is_expensive_cache_processor(void *arg) { is_expensive_cache= (int8)(-1); @@ -3576,6 +3614,7 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg); + bool dep_on_one_column(void *arg); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; @@ -5946,6 +5985,7 @@ public: Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) { return this; } Item *remove_item_direct_ref() { return this; } + bool dep_on_one_column(void *arg); }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 83eb605f463..8084af72f60 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2106,6 +2106,31 @@ bool Item_func_between::count_sargable_conds(void *arg) return 0; } +bool Item_func_between::predicate_selectivity_checker(void *arg) +{ + if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM) + { + if (is_range_predicate(args[0], args[1]) && + is_range_predicate(args[0], args[2])) + { + return args[0]->dep_on_one_column(arg); + } + return true; + } + + for (uint i= 1 ; i < arg_count ; i++) + { + if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM) + { + if (!is_range_predicate(args[i], args[0])) + return true; + if (args[i]->dep_on_one_column(arg)) + return true; + } + } + return false; +} + void Item_func_between::fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge) @@ -4290,6 +4315,16 @@ bool Item_func_in::count_sargable_conds(void *arg) } +bool Item_func_in::predicate_selectivity_checker(void *arg) +{ + if (!all_items_are_consts(args + 1, arg_count - 1)) + return true; + if (key_item()->dep_on_one_column(arg)) + return true; + return false; +} + + bool Item_func_in::list_contains_null() { Item **arg,**arg_end; @@ -5516,6 +5551,16 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg) } +bool Item_func_null_predicate::predicate_selectivity_checker(void *arg) +{ + if (is_range_predicate(args[0], NULL)) + { + return args[0]->dep_on_one_column(arg); + } + return true; +} + + longlong Item_func_isnull::val_int() { DBUG_ASSERT(fixed == 1); @@ -5597,6 +5642,17 @@ bool Item_bool_func2::count_sargable_conds(void *arg) return 0; } + +bool Item_bool_func2::predicate_selectivity_checker(void *arg) +{ + if (is_range_predicate(args[0], args[1])) + return args[0]->dep_on_one_column(arg); + if (is_range_predicate(args[1], args[0])) + return args[1]->dep_on_one_column(arg); + return true; +} + + void Item_func_like::print(String *str, enum_query_type query_type) { args[0]->print_parenthesised(str, query_type, precedence()); @@ -5696,8 +5752,18 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) bool sargable_pattern= with_sargable_pattern(); param->thd->mem_root= tmp_root; return sargable_pattern ? - Item_bool_func2::get_mm_tree(param, cond_ptr) : - Item_func::get_mm_tree(param, cond_ptr); + Item_bool_func2::get_mm_tree(param, cond_ptr) : + Item_func::get_mm_tree(param, cond_ptr); +} + + +bool Item_func_like::predicate_selectivity_checker(void *arg) +{ + if (with_sargable_pattern()) + { + return args[0]->dep_on_one_column(arg); + } + return true; } @@ -7139,6 +7205,49 @@ bool Item_equal::count_sargable_conds(void *arg) } +bool Item_equal::predicate_selectivity_checker(void *arg) +{ + /* + For equality conditions like tbl1.col = tbl2.col + we only want to know if the number of distinct values (ndv) is + available for all the fields in the multiple equality or not. + */ + Item_equal_fields_iterator it(*this); + while (it++) + { + Field *field= it.get_curr_field(); + if (!(field->is_ndv_available())) + return true; + } + + it.rewind(); + Item *item; + SAME_FIELD *same_field= (SAME_FIELD *) arg; + while ((item= it++)) + { + if (same_field->item) + return item->dep_on_one_column(arg); + item->dep_on_one_column(arg); + break; + } + return false; +} + + +bool Item_equal::is_statistics_available_for_range_predicates() +{ + bool found= false; + Item_equal_fields_iterator it(*this); + while (it++) + { + Field *field= it.get_curr_field(); + if (field->is_statistics_available_for_range_predicates()) + found= true; + } + return found; +} + + /** @brief Evaluate multiple equality diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index fa715badfc7..f467efd0e10 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -227,6 +227,7 @@ public: bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; } uint decimal_precision() const { return 1; } bool need_parentheses_in_default() { return true; } + bool predicate_selectivity_checker(void *arg) { return TRUE; } }; @@ -418,6 +419,7 @@ public: COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); /* Specifies which result type the function uses to compare its arguments. This method is used in equal field propagation. @@ -936,6 +938,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, table_map usable_tables, SARGABLE_PARAM **sargables); @@ -2473,6 +2476,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); Item *get_copy(THD *thd) { return get_item_copy<Item_func_in>(thd, this); } Item *build_clone(THD *thd) @@ -2570,6 +2574,7 @@ public: return FALSE; } bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); }; @@ -2813,6 +2818,7 @@ public: Item *get_copy(THD *thd) { return get_item_copy<Item_func_like>(thd, this); } + bool predicate_selectivity_checker(void *arg); }; @@ -3220,6 +3226,8 @@ public: uint elements_count() { return equal_items.elements; } friend class Item_equal_fields_iterator; bool count_sargable_conds(void *arg); + bool predicate_selectivity_checker(void *arg); + bool is_statistics_available_for_range_predicates(); Item *multiple_equality_transformer(THD *thd, uchar *arg); friend class Item_equal_iterator<List_iterator_fast,Item>; friend class Item_equal_iterator<List_iterator,Item>; @@ -3371,6 +3379,7 @@ public: SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_and>(thd, this); } + bool predicate_selectivity_checker(void *arg) { return FALSE; } }; inline bool is_cond_and(Item *item) @@ -3395,6 +3404,7 @@ public: Item *neg_transformer(THD *thd); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_or>(thd, this); } + bool predicate_selectivity_checker(void *arg) { return FALSE; } }; class Item_func_dyncol_check :public Item_bool_func diff --git a/sql/item_func.h b/sql/item_func.h index 6a4a9fa5dae..d04cc8643cf 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -399,6 +399,12 @@ public: Item_func *get_item_func() { return this; } bool is_simplified_cond_processor(void *arg) { return const_item() && !val_int(); } + bool predicate_selectivity_checker(void *arg) + { + if (const_item() && !is_expensive()) + return false; + return true; + } }; diff --git a/sql/opt_range.cc b/sql/opt_range.cc index adc0572cb1c..d3f49097189 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3105,7 +3105,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, { Field *field= *field_ptr; if (bitmap_is_set(used_fields, field->field_index) && - is_eits_usable(field)) + field->is_eits_usable()) parts++; } @@ -3126,7 +3126,7 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, Field *field= *field_ptr; if (bitmap_is_set(used_fields, field->field_index)) { - if (!is_eits_usable(field)) + if (!field->is_eits_usable()) continue; uint16 store_length; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bb9d82a7039..3553ee78c5b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8296,6 +8296,11 @@ choose_plan(JOIN *join, table_map join_tables) jtab_sort_func, (void*)join->emb_sjm_nest); Json_writer_object wrapper(thd); + + if (join->conds) + wrapper.add("cardinality_accurate", + join->conds->with_accurate_selectivity_estimation()); + Json_writer_array trace_plan(thd,"considered_execution_plans"); if (!join->emb_sjm_nest) @@ -29449,6 +29454,38 @@ void unpack_to_base_table_fields(TABLE *table) } + +/* + @brief + Checks if a predicate is a range predicate with a constant part + + @param + + @item the item referring to the field of the table + @value the item referring to the expression on the + rhs of a predicate + + @details + Range predicate is defined as the form of field op const + where op can be operators like </<=/=/>/>=/BETWEEN etc. + Also the statistics for the field should be available via + an index or statistical tables. + + @retval + TRUE : Success + FALSE : Otherwise +*/ + +bool is_range_predicate(Item *item, Item *value) +{ + Item *field= item->real_item(); + if (field->type() == Item::FIELD_ITEM && !field->const_item() && + (!value || !value->is_expensive())) + return true; + return false; +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 2aba63ddc79..4d5a6e0abf5 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2531,5 +2531,6 @@ void propagate_new_equalities(THD *thd, Item *cond, List<Item_equal> *new_equalities, COND_EQUAL *inherited, bool *is_simplifiable_cond); +bool is_range_predicate(Item *item, Item *value); #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 2636299e330..71d472feaba 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -31,7 +31,6 @@ #include "opt_range.h" #include "uniques.h" #include "sql_show.h" -#include "sql_partition.h" /* The system variable 'use_stat_tables' can take one of the @@ -3159,7 +3158,10 @@ static void dump_stats_from_share_to_table(TABLE *table) Field **field_ptr= table_share->field; Field **table_field_ptr= table->field; for ( ; *field_ptr; field_ptr++, table_field_ptr++) + { (*table_field_ptr)->read_stats= (*field_ptr)->read_stats; + (*table_field_ptr)->stats_available= (*field_ptr)->stats_available; + } table->stats_is_read= true; } @@ -3978,37 +3980,3 @@ bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table) } return false; } - -/* - Check wheter we can use EITS statistics for a field or not - - TRUE : Use EITS for the columns - FALSE: Otherwise -*/ - -bool is_eits_usable(Field *field) -{ - Column_statistics* col_stats= field->read_stats; - - // check if column_statistics was allocated for this field - if (!col_stats) - return false; - - DBUG_ASSERT(field->table->stats_is_read); - - /* - (1): checks if we have EITS statistics for a particular column - (2): Don't use EITS for GEOMETRY columns - (3): Disabling reading EITS statistics for columns involved in the - partition list of a table. We assume the selecticivity for - such columns would be handled during partition pruning. - */ - - return !col_stats->no_stat_values_provided() && //(1) - field->type() != MYSQL_TYPE_GEOMETRY && //(2) -#ifdef WITH_PARTITION_STORAGE_ENGINE - (!field->table->part_info || - !field->table->part_info->field_in_partition_expr(field)) && //(3) -#endif - true; -} diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 20ecf06bfee..34471fd5270 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -138,7 +138,6 @@ double get_column_range_cardinality(Field *field, key_range *max_endp, uint range_flag); bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); -bool is_eits_usable(Field* field); class Histogram { diff --git a/sql/table.cc b/sql/table.cc index c48a6fed89a..02f4ece9c40 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5435,6 +5435,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) { (*f_ptr)->next_equal_field= NULL; (*f_ptr)->cond_selectivity= 1.0; + (*f_ptr)->stats_available= 0; } notnull_cond= 0; |