diff options
author | unknown <sanja@askmonty.org> | 2013-04-18 22:22:04 +0300 |
---|---|---|
committer | unknown <sanja@askmonty.org> | 2013-04-18 22:22:04 +0300 |
commit | 9441e536535af3e71aaa81801645ab42bd07e89f (patch) | |
tree | 4479a5d77e53380b7bd2d9b3d4db3c271ea6b40a /sql | |
parent | 8c71e7a38358c79e1b5b9072fa7407cec3cfd273 (diff) | |
download | mariadb-git-9441e536535af3e71aaa81801645ab42bd07e89f.tar.gz |
MDEV-4345
Sampling of selectivity of LIKE predicate.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item.h | 15 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 33 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 8 | ||||
-rw-r--r-- | sql/opt_range.cc | 69 | ||||
-rw-r--r-- | sql/opt_range.h | 7 | ||||
-rw-r--r-- | sql/sql_class.h | 1 | ||||
-rw-r--r-- | sql/sql_select.cc | 72 | ||||
-rw-r--r-- | sql/sql_select.h | 13 | ||||
-rw-r--r-- | sql/sys_vars.cc | 15 | ||||
-rw-r--r-- | sql/table.cc | 1 | ||||
-rw-r--r-- | sql/table.h | 3 |
11 files changed, 232 insertions, 5 deletions
diff --git a/sql/item.h b/sql/item.h index 50d125f76de..d8ae051b4b5 100644 --- a/sql/item.h +++ b/sql/item.h @@ -548,6 +548,14 @@ typedef bool (Item::*Item_analyzer) (uchar **argp); typedef Item* (Item::*Item_transformer) (uchar *arg); typedef void (*Cond_traverser) (const Item *item, void *arg); +struct st_cond_statistic; + +struct find_selective_predicates_list_processor_data +{ + TABLE *table; + List<st_cond_statistic> list; +}; + class Item_equal; class COND_EQUAL; @@ -1108,6 +1116,11 @@ public: return (this->*processor)(arg); } + virtual bool walk_top_and(Item_processor processor, uchar *arg) + { + return (this->*processor)(arg); + } + virtual Item* transform(Item_transformer transformer, uchar *arg); /* @@ -1174,6 +1187,8 @@ public: return FALSE; } virtual bool exists2in_processor(uchar *opt_arg) { return 0; } + virtual bool find_selective_predicates_list_processor(uchar *opt_arg) + { return 0; } /* To call bool function for all arguments */ struct bool_func_call_args diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index b5b143a2448..7ca85a72cfa 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4428,6 +4428,16 @@ bool Item_cond::walk(Item_processor processor, bool walk_subquery, uchar *arg) return Item_func::walk(processor, walk_subquery, arg); } +bool Item_cond_and::walk_top_and(Item_processor processor, uchar *arg) +{ + List_iterator_fast<Item> li(list); + Item *item; + while ((item= li++)) + if (item->walk_top_and(processor, arg)) + return 1; + return Item_cond::walk_top_and(processor, arg); +} + /** Transform an Item_cond object with a transformer callback function. @@ -4940,6 +4950,7 @@ bool Item_func_like::fix_fields(THD *thd, Item **ref) turboBM_compute_bad_character_shifts(); DBUG_PRINT("info",("done")); } + use_sampling= ((*first == wild_many || *first == wild_one) && len > 2); } } return FALSE; @@ -4951,6 +4962,28 @@ void Item_func_like::cleanup() Item_bool_func2::cleanup(); } + +bool Item_func_like::find_selective_predicates_list_processor(uchar *arg) +{ + find_selective_predicates_list_processor_data *data= + (find_selective_predicates_list_processor_data *) arg; + if (use_sampling && used_tables() == data->table->map) + { + COND_STATISTIC *stat= (COND_STATISTIC *)sql_alloc(sizeof(COND_STATISTIC)); + if (!stat) + return TRUE; + stat->cond= this; + Item *arg0= args[0]->real_item(); + if (args[1]->const_item() && arg0->type() == FIELD_ITEM) + stat->field_arg= ((Item_field *)arg0)->field; + else + stat->field_arg= NULL; + data->list.push_back(stat); + } + return FALSE; +} + + /** @brief Compile regular expression. diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 81598ba96c8..ab4219564fe 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1487,8 +1487,9 @@ class Item_func_like :public Item_bool_func2 enum { alphabet_size = 256 }; Item *escape_item; - + bool escape_used_in_parsing; + bool use_sampling; public: int escape; @@ -1496,7 +1497,7 @@ public: Item_func_like(Item *a,Item *b, Item *escape_arg, bool escape_used) :Item_bool_func2(a,b), canDoTurboBM(FALSE), pattern(0), pattern_len(0), bmGs(0), bmBc(0), escape_item(escape_arg), - escape_used_in_parsing(escape_used) {} + escape_used_in_parsing(escape_used), use_sampling(0) {} longlong val_int(); enum Functype functype() const { return LIKE_FUNC; } optimize_type select_optimize() const; @@ -1504,6 +1505,8 @@ public: const char *func_name() const { return "like"; } bool fix_fields(THD *thd, Item **ref); void cleanup(); + + bool find_selective_predicates_list_processor(uchar *arg); }; @@ -1914,6 +1917,7 @@ public: Item *neg_transformer(THD *thd); void mark_as_condition_AND_part(TABLE_LIST *embedding); virtual uint exists2in_reserved_items() { return list.elements; }; + bool walk_top_and(Item_processor processor, uchar *arg); }; inline bool is_cond_and(Item *item) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index c29a888ea6f..b736f898768 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3530,7 +3530,74 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) } } } - } + } + + /* Calculate selectivity of probably highly selective predicates */ + ulong check_rows= + min(thd->variables.optimizer_selectivity_sampling_limit, + (ulong) (table_records * SELECTIVITY_SAMPLING_SHARE)); + if (cond && check_rows > SELECTIVITY_SAMPLING_THRESHOLD && + thd->variables.optimizer_use_condition_selectivity > 4) + { + find_selective_predicates_list_processor_data *dt= + (find_selective_predicates_list_processor_data *) + alloc_root(thd->mem_root, + sizeof(find_selective_predicates_list_processor_data)); + if (!dt) + DBUG_RETURN(TRUE); + dt->list.empty(); + dt->table= table; + if (cond->walk(&Item::find_selective_predicates_list_processor, 0, + (uchar*) dt)) + DBUG_RETURN(TRUE); + if (dt->list.elements > 0) + { + check_rows= check_selectivity(thd, check_rows, table, &dt->list); + if (check_rows > SELECTIVITY_SAMPLING_THRESHOLD) + { + COND_STATISTIC *stat; + List_iterator_fast<COND_STATISTIC> it(dt->list); + double examined_rows= check_rows; + while ((stat= it++)) + { + if (!stat->positive) + { + DBUG_PRINT("info", ("To avoid 0 assigned 1 to the counter")); + stat->positive= 1; // avoid 0 + } + DBUG_PRINT("info", ("The predicate selectivity : %g", + (double)stat->positive / examined_rows)); + double selectivity= ((double)stat->positive) / examined_rows; + table->cond_selectivity*= selectivity; + /* + If a field is involved then we register its selectivity in case + there in an equality with the field. + For example in case + t1.a LIKE "%bla%" and t1.a = t2.b + the selectivity we have found could be used also for t2. + */ + if (stat->field_arg) + { + stat->field_arg->cond_selectivity*= selectivity; + + if (stat->field_arg->next_equal_field) + { + for (Field *next_field= stat->field_arg->next_equal_field; + next_field != stat->field_arg; + next_field= next_field->next_equal_field) + { + next_field->cond_selectivity*= selectivity; + next_field->table->cond_selectivity*= selectivity; + } + } + } + } + + } + /* This list and its elements put to mem_root so should not be freed */ + table->cond_selectivity_sampling_explain= &dt->list; + } + } DBUG_RETURN(FALSE); } diff --git a/sql/opt_range.h b/sql/opt_range.h index d98bf1186e8..963551cabdb 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1051,4 +1051,11 @@ void store_key_image_to_rec(Field *field, uchar *ptr, uint len); extern String null_string; +/* check this number of rows (default value) */ +#define SELECTIVITY_SAMPLING_LIMIT 5000 +/* but no more then this part of table (10%) */ +#define SELECTIVITY_SAMPLING_SHARE 0.10 +/* do not check if we are going check less then this number of records */ +#define SELECTIVITY_SAMPLING_THRESHOLD 10 + #endif diff --git a/sql/sql_class.h b/sql/sql_class.h index 8e2bd59da57..bb5b2c4e775 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -505,6 +505,7 @@ typedef struct system_variables ulong net_write_timeout; ulong optimizer_prune_level; ulong optimizer_search_depth; + ulong optimizer_selectivity_sampling_limit; ulong optimizer_use_condition_selectivity; ulong use_stat_tables; ulong histogram_size; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index cc4b139fb2f..a48eff66386 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -23972,6 +23972,78 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, return MAX_KEY; } +/* + Count how much times conditions are true for several first rows of the table + + @param thd thread handle + @param rows_to_read how much rows to check + @param table table which should be checked + @conds conds list of conditions and countars for them + + @return number of really checked rows or 0 in case of error or empty table +*/ + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List<COND_STATISTIC> *conds) +{ + ulong count= 0; + COND_STATISTIC *cond; + List_iterator_fast<COND_STATISTIC> it(*conds); + handler *file= table->file; + uchar *record= table->record[0]; + int error= 0; + DBUG_ENTER("check_selectivity"); + + DBUG_ASSERT(rows_to_read > 0); + while ((cond= it++)) + { + DBUG_ASSERT(cond->cond); + DBUG_ASSERT(cond->cond->used_tables() == table->map); + cond->positive= 0; + } + it.rewind(); + + if (file->ha_rnd_init_with_error(1)) + DBUG_RETURN(0); + do + { + error= file->ha_rnd_next(record); + + if (thd->killed) + { + thd->send_kill_message(); + count= 0; + goto err; + } + if (error) + { + if (error == HA_ERR_RECORD_DELETED) + continue; + if (error == HA_ERR_END_OF_FILE) + break; + goto err; + } + + count++; + while ((cond= it++)) + { + if (cond->cond->val_bool()) + cond->positive++; + } + it.rewind(); + + } while (count < rows_to_read); + + file->ha_rnd_end(); + DBUG_RETURN(count); + +err: + DBUG_PRINT("error", ("error %d", error)); + file->ha_rnd_end(); + DBUG_RETURN(0); +} /** @} (end of group Query_Optimizer) diff --git a/sql/sql_select.h b/sql/sql_select.h index 9cda68bd434..950c48d6ea1 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1855,4 +1855,17 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps); double prev_record_reads(POSITION *positions, uint idx, table_map found_ref); void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); +struct st_cond_statistic +{ + Item *cond; + Field *field_arg; + ulong positive; +}; +typedef struct st_cond_statistic COND_STATISTIC; + +ulong check_selectivity(THD *thd, + ulong rows_to_read, + TABLE *table, + List<COND_STATISTIC> *conds); + #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 4393809a6fe..24009cb5a99 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -56,6 +56,7 @@ #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ #include "threadpool.h" #include "sql_repl.h" +#include "opt_range.h" /* The rule for this file: everything should be 'static'. When a sys_var @@ -1533,6 +1534,14 @@ static Sys_var_ulong Sys_optimizer_prune_level( SESSION_VAR(optimizer_prune_level), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1), DEFAULT(1), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_selectivity_sampling_limit( + "optimizer_selectivity_sampling_limit", + "Controls number of record samples to check condition selectivity", + SESSION_VAR(optimizer_selectivity_sampling_limit), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(SELECTIVITY_SAMPLING_THRESHOLD, UINT_MAX), + DEFAULT(SELECTIVITY_SAMPLING_LIMIT), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "optimizer_use_condition_selectivity", "Controls selectivity of which conditions the optimizer takes into " @@ -1548,9 +1557,11 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity( "not backed by any index to calculate the cardinality of a partial join, " "4 - use histograms to calculate selectivity of range conditions that " "are not backed by any index to calculate the cardinality of " - "a partial join.", + "a partial join." + "5 - additionally use selectivity of certain non-range predicates " + "calculated on record samples", SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(1, 4), DEFAULT(1), BLOCK_SIZE(1)); + VALID_RANGE(1, 5), DEFAULT(1), BLOCK_SIZE(1)); /** Warns about deprecated value 63 */ static bool fix_optimizer_search_depth(sys_var *self, THD *thd, diff --git a/sql/table.cc b/sql/table.cc index e88b3453ce9..caed8ed4107 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -3891,6 +3891,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) reginfo.impossible_range= 0; created= TRUE; cond_selectivity= 1.0; + cond_selectivity_sampling_explain= NULL; /* Catch wrong handling of the auto_increment_field_not_null. */ DBUG_ASSERT(!auto_increment_field_not_null); diff --git a/sql/table.h b/sql/table.h index e721d60f892..af79bffd437 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1038,6 +1038,8 @@ enum index_hint_type INDEX_HINT_FORCE }; +struct st_cond_statistic; + #define CHECK_ROW_FOR_NULLS_TO_REJECT (1 << 0) #define REJECT_ROW_DUE_TO_NULL_FIELDS (1 << 1) @@ -1163,6 +1165,7 @@ public: ha_rows quick_condition_rows; double cond_selectivity; + List<st_cond_statistic> *cond_selectivity_sampling_explain; table_map map; /* ID bit of table (1,2,4,8,16...) */ |