diff options
author | Igor Babaev <igor@askmonty.org> | 2013-04-17 10:18:04 -0700 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2013-04-17 10:18:04 -0700 |
commit | a1cd28e2e54d32dc70280875bab7f0d35e9370c4 (patch) | |
tree | ceb810aa1bd2619efec70cb60d4adbac05e35a42 /sql | |
parent | 0e7410a154560486ce4e0c975f122a0b15853bb1 (diff) | |
parent | 43fe53acc225a2cea07188d142c1c0f8364cc43b (diff) | |
download | mariadb-git-a1cd28e2e54d32dc70280875bab7f0d35e9370c4.tar.gz |
Merge 10.0-base -> 10.0
Diffstat (limited to 'sql')
-rw-r--r-- | sql/field.cc | 136 | ||||
-rw-r--r-- | sql/field.h | 37 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 20 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 4 | ||||
-rw-r--r-- | sql/keycaches.cc | 66 | ||||
-rw-r--r-- | sql/keycaches.h | 12 | ||||
-rw-r--r-- | sql/log_event.cc | 11 | ||||
-rw-r--r-- | sql/mysqld.cc | 60 | ||||
-rw-r--r-- | sql/opt_range.cc | 322 | ||||
-rw-r--r-- | sql/opt_range.h | 2 | ||||
-rw-r--r-- | sql/opt_range_mrr.cc | 6 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 2 | ||||
-rw-r--r-- | sql/opt_subselect.h | 1 | ||||
-rw-r--r-- | sql/rpl_filter.cc | 12 | ||||
-rw-r--r-- | sql/rpl_filter.h | 3 | ||||
-rw-r--r-- | sql/rpl_mi.cc | 68 | ||||
-rw-r--r-- | sql/rpl_mi.h | 4 | ||||
-rw-r--r-- | sql/rpl_rli.cc | 2 | ||||
-rw-r--r-- | sql/slave.cc | 6 | ||||
-rw-r--r-- | sql/sql_acl.cc | 5 | ||||
-rw-r--r-- | sql/sql_class.h | 8 | ||||
-rw-r--r-- | sql/sql_parse.cc | 11 | ||||
-rw-r--r-- | sql/sql_priv.h | 1 | ||||
-rw-r--r-- | sql/sql_select.cc | 381 | ||||
-rw-r--r-- | sql/sql_select.h | 8 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 596 | ||||
-rw-r--r-- | sql/sql_statistics.h | 194 | ||||
-rw-r--r-- | sql/sys_vars.cc | 102 | ||||
-rw-r--r-- | sql/sys_vars.h | 6 | ||||
-rw-r--r-- | sql/table.cc | 14 | ||||
-rw-r--r-- | sql/table.h | 8 | ||||
-rw-r--r-- | sql/uniques.cc | 40 |
32 files changed, 1991 insertions, 157 deletions
diff --git a/sql/field.cc b/sql/field.cc index 460a3bd514b..e85903d76c6 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -1273,6 +1273,37 @@ out_of_range: return 1; } + +/** + @brief + Determine the relative position of the field value in a numeric interval + + @details + The function returns a double number between 0.0 and 1.0 as the relative + position of the value of the this field in the numeric interval of [min,max]. + If the value is not in the interval the the function returns 0.0 when + the value is less than min, and, 1.0 when the value is greater than max. + + @param min value of the left end of the interval + @param max value of the right end of the interval + + @return + relative position of the field value in the numeric interval [min,max] +*/ + +double Field_num::pos_in_interval(Field *min, Field *max) +{ + double n, d; + n= val_real() - min->val_real(); + if (n < 0) + return 0.0; + d= max->val_real() - min->val_real(); + if (d <= 0) + return 1.0; + return min(n/d, 1.0); +} + + /** Process decimal library return codes and issue warnings for overflow and truncation. @@ -1344,6 +1375,8 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, comment.length=0; field_index= 0; is_stat_field= FALSE; + cond_selectivity= 1.0; + next_equal_field= NULL; } @@ -6165,6 +6198,79 @@ int Field_str::store(double nr) return store(buff, length, &my_charset_numeric); } +static +inline ulonglong char_prefix_to_ulonglong(uchar *src) +{ + uint sz= sizeof(ulonglong); + for (uint i= 0; i < sz/2; i++) + { + uchar tmp= src[i]; + src[i]= src[sz-1-i]; + src[sz-1-i]= tmp; + } + return uint8korr(src); +} + +/** + @brief + Determine the relative position of the field value in a string interval + + @details + The function returns a double number between 0.0 and 1.0 as the relative + position of the value of the this field in the string interval of [min,max]. + If the value is not in the interval the the function returns 0.0 when + the value is less than min, and, 1.0 when the value is greater than max. + + @note + To calculate the relative position of the string value v in the interval + [min, max] the function first converts the beginning of these three + strings v, min, max into the strings that are used for byte comparison. + For each string not more sizeof(ulonglong) first bytes are taken + from the result of conversion. Then these bytes are interpreted as the + big-endian representation of an ulonglong integer. The values of these + integer numbers obtained for the strings v, min, max are used to calculate + the position of v in [min,max] in the same way is it's done for numeric + fields (see Field_num::pos_in_interval). + + @todo + Improve the procedure for the case when min and max have the same + beginning + + @param min value of the left end of the interval + @param max value of the right end of the interval + + @return + relative position of the field value in the string interval [min,max] +*/ + +double Field_str::pos_in_interval(Field *min, Field *max) +{ + uchar mp_prefix[sizeof(ulonglong)]; + uchar minp_prefix[sizeof(ulonglong)]; + uchar maxp_prefix[sizeof(ulonglong)]; + ulonglong mp, minp, maxp; + my_strnxfrm(charset(), mp_prefix, sizeof(mp), + ptr + length_size(), + data_length()); + my_strnxfrm(charset(), minp_prefix, sizeof(minp), + min->ptr + length_size(), + min->data_length()); + my_strnxfrm(charset(), maxp_prefix, sizeof(maxp), + max->ptr + length_size(), + max->data_length()); + mp= char_prefix_to_ulonglong(mp_prefix); + minp= char_prefix_to_ulonglong(minp_prefix); + maxp= char_prefix_to_ulonglong(maxp_prefix); + double n, d; + n= mp - minp; + if (n < 0) + return 0.0; + d= maxp - minp; + if (d <= 0) + return 1.0; + return min(n/d, 1.0); +} + uint Field::is_equal(Create_field *new_field) { @@ -8376,6 +8482,36 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value) } +/** + @brief + Determine the relative position of the field value in a bit interval + + @details + The function returns a double number between 0.0 and 1.0 as the relative + position of the value of the this field in the bit interval of [min,max]. + If the value is not in the interval the the function returns 0.0 when + the value is less than min, and, 1.0 when the value is greater than max. + + @param min value of the left end of the interval + @param max value of the right end of the interval + + @return + relative position of the field value in the bit interval [min,max] +*/ + +double Field_bit::pos_in_interval(Field *min, Field *max) +{ + double n, d; + n= val_real() - min->val_real(); + if (n < 0) + return 0.0; + d= max->val_real() - min->val_real(); + if (d <= 0) + return 1.0; + return min(n/d, 1.0); +} + + /* Compare two bit fields using pointers within the record. SYNOPSIS diff --git a/sql/field.h b/sql/field.h index d03479bbd06..48d873beb32 100644 --- a/sql/field.h +++ b/sql/field.h @@ -220,7 +220,23 @@ public: */ bool is_created_from_null_item; - bool is_stat_field; /* TRUE in Field objects created for column min/max values */ + /* TRUE in Field objects created for column min/max values */ + bool is_stat_field; + + /* + Selectivity of the range condition over this field. + When calculating this selectivity a range predicate + is taken into account only if: + - it is extracted from the WHERE clause + - it depends only on the table the field belongs to + */ + double cond_selectivity; + + /* + The next field in the class of equal fields at the top AND level + of the WHERE clause + */ + Field *next_equal_field; /* This structure is used for statistical data on the column @@ -456,6 +472,10 @@ public: } return update_fl; } + virtual void store_field_value(uchar *val, uint len) + { + memcpy(ptr, val, len); + } virtual uint decimals() const { return 0; } /* Caller beware: sql_type can change str.Ptr, so check @@ -703,6 +723,12 @@ public: virtual bool hash_join_is_possible() { return TRUE; } virtual bool eq_cmp_as_binary() { return TRUE; } + /* Position of the field value within the interval of [min, max] */ + virtual double pos_in_interval(Field *min, Field *max) + { + return (double) 0.5; + } + friend int cre_myisam(char * name, register TABLE *form, uint options, ulonglong auto_increment_value); friend class Copy_field; @@ -821,6 +847,7 @@ public: bool get_int(CHARSET_INFO *cs, const char *from, uint len, longlong *rnd, ulonglong unsigned_max, longlong signed_min, longlong signed_max); + double pos_in_interval(Field *min, Field *max); }; @@ -866,6 +893,8 @@ public: virtual bool str_needs_quotes() { return TRUE; } uint is_equal(Create_field *new_field); bool eq_cmp_as_binary() { return test(flags & BINARY_FLAG); } + virtual uint length_size() { return 0; } + double pos_in_interval(Field *min, Field *max); }; /* base class for Field_string, Field_varstring and Field_blob */ @@ -1894,6 +1923,7 @@ public: uint new_null_bit); uint is_equal(Create_field *new_field); void hash(ulong *nr, ulong *nr2); + uint length_size() { return length_bytes; } private: int do_save_field_metadata(uchar *first_byte); }; @@ -2275,6 +2305,11 @@ public: } return update_fl; } + void store_field_value(uchar *val, uint len) + { + store(*((longlong *)val), TRUE); + } + double pos_in_interval(Field *min, Field *max); void get_image(uchar *buff, uint length, CHARSET_INFO *cs) { get_key_image(buff, length, itRAW); } void set_image(const uchar *buff,uint length, CHARSET_INFO *cs) diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 6f10e84a5f5..b5b143a2448 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5568,7 +5568,8 @@ Item *Item_bool_rowready_func2::negated_item() */ Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item) - : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL), + link_equal_fields(FALSE) { const_item_cache= 0; with_const= with_const_item; @@ -5592,7 +5593,8 @@ Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item) */ Item_equal::Item_equal(Item_equal *item_equal) - : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL), + link_equal_fields(FALSE) { const_item_cache= 0; List_iterator_fast<Item> li(item_equal->equal_items); @@ -5918,6 +5920,9 @@ bool Item_equal::fix_fields(THD *thd, Item **ref) DBUG_ASSERT(fixed == 0); Item_equal_fields_iterator it(*this); Item *item; + Field *first_equal_field; + Field *last_equal_field; + Field *prev_equal_field= NULL; not_null_tables_cache= used_tables_cache= 0; const_item_cache= 0; while ((item= it++)) @@ -5931,7 +5936,18 @@ bool Item_equal::fix_fields(THD *thd, Item **ref) maybe_null= 1; if (!item->get_item_equal()) item->set_item_equal(this); + if (link_equal_fields && item->real_item()->type() == FIELD_ITEM) + { + last_equal_field= ((Item_field *) (item->real_item()))->field; + if (!prev_equal_field) + first_equal_field= last_equal_field; + else + prev_equal_field->next_equal_field= last_equal_field; + prev_equal_field= last_equal_field; + } } + if (prev_equal_field && last_equal_field != first_equal_field) + last_equal_field->next_equal_field= first_equal_field; fix_length_and_dec(); fixed= 1; return FALSE; diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 4ca31e01df9..81598ba96c8 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1737,6 +1737,8 @@ class Item_equal: public Item_bool_func */ Item_field *context_field; + bool link_equal_fields; + public: COND_EQUAL *upper_levels; /* multiple equalities of upper and levels */ @@ -1774,6 +1776,8 @@ public: CHARSET_INFO *compare_collation(); void set_context_field(Item_field *ctx_field) { context_field= ctx_field; } + void set_link_equal_fields(bool flag) { link_equal_fields= flag; } + friend class Item_equal_fields_iterator; friend class Item_equal_iterator<List_iterator_fast,Item>; friend class Item_equal_iterator<List_iterator,Item>; friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, diff --git a/sql/keycaches.cc b/sql/keycaches.cc index 84ed67d00f0..9e4b943dc83 100644 --- a/sql/keycaches.cc +++ b/sql/keycaches.cc @@ -20,6 +20,7 @@ ****************************************************************************/ NAMED_ILIST key_caches; +NAMED_ILIST rpl_filters; /** ilink (intrusive list element) with a name @@ -66,6 +67,23 @@ uchar* find_named(I_List<NAMED_ILINK> *list, const char *name, uint length, } +bool NAMED_ILIST::delete_element(const char *name, uint length, void (*free_element)(const char *name, uchar*)) +{ + I_List_iterator<NAMED_ILINK> it(*this); + NAMED_ILINK *element; + DBUG_ENTER("NAMED_ILIST::delete_element"); + while ((element= it++)) + { + if (element->cmp(name, length)) + { + (*free_element)(element->name, element->data); + delete element; + DBUG_RETURN(0); + } + } + DBUG_RETURN(1); +} + void NAMED_ILIST::delete_elements(void (*free_element)(const char *name, uchar*)) { NAMED_ILINK *element; @@ -159,3 +177,51 @@ bool process_key_caches(process_key_cache_t func, void *param) return res != 0; } +/* Rpl_filter functions */ + +LEX_STRING default_rpl_filter_base= {C_STRING_WITH_LEN("")}; + +Rpl_filter *get_rpl_filter(LEX_STRING *filter_name) +{ + if (!filter_name->length) + filter_name= &default_rpl_filter_base; + return ((Rpl_filter*) find_named(&rpl_filters, + filter_name->str, filter_name->length, 0)); +} + +Rpl_filter *create_rpl_filter(const char *name, uint length) +{ + Rpl_filter *filter; + DBUG_ENTER("create_rpl_filter"); + DBUG_PRINT("enter",("name: %.*s", length, name)); + + filter= new Rpl_filter; + if (filter) + { + if (!new NAMED_ILINK(&rpl_filters, name, length, (uchar*) filter)) + { + delete filter; + filter= 0; + } + } + DBUG_RETURN(filter); +} + + +Rpl_filter *get_or_create_rpl_filter(const char *name, uint length) +{ + LEX_STRING rpl_filter_name; + Rpl_filter *filter; + + rpl_filter_name.str= (char *) name; + rpl_filter_name.length= length; + if (!(filter= get_rpl_filter(&rpl_filter_name))) + filter= create_rpl_filter(name, length); + return filter; +} + +void free_rpl_filter(const char *name, Rpl_filter *filter) +{ + delete filter; +} + diff --git a/sql/keycaches.h b/sql/keycaches.h index 04d3f6145e7..2d52cb28973 100644 --- a/sql/keycaches.h +++ b/sql/keycaches.h @@ -18,6 +18,7 @@ #include "sql_list.h" #include <keycache.h> +#include <rpl_filter.h> extern "C" { @@ -30,8 +31,10 @@ class NAMED_ILIST: public I_List<NAMED_ILINK> { public: void delete_elements(void (*free_element)(const char*, uchar*)); + bool delete_element(const char *name, uint length, void (*free_element)(const char*, uchar*)); }; +/* For key cache */ extern LEX_STRING default_key_cache_base; extern KEY_CACHE zero_key_cache; extern NAMED_ILIST key_caches; @@ -42,4 +45,13 @@ KEY_CACHE *get_or_create_key_cache(const char *name, uint length); void free_key_cache(const char *name, KEY_CACHE *key_cache); bool process_key_caches(process_key_cache_t func, void *param); +/* For Rpl_filter */ +extern LEX_STRING default_rpl_filter_base; +extern NAMED_ILIST rpl_filters; + +Rpl_filter *create_rpl_filter(const char *name, uint length); +Rpl_filter *get_rpl_filter(LEX_STRING *filter_name); +Rpl_filter *get_or_create_rpl_filter(const char *name, uint length); +void free_rpl_filter(const char *name, Rpl_filter *filter); + #endif /* KEYCACHES_INCLUDED */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 27620ea465a..90ec5f52178 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -3756,6 +3756,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli, HA_CREATE_INFO db_options; uint64 sub_id= 0; rpl_gtid gtid; + Rpl_filter *rpl_filter= rli->mi->rpl_filter; DBUG_ENTER("Query_log_event::do_apply_event"); /* @@ -5439,6 +5440,7 @@ int Load_log_event::do_apply_event(NET* net, Relay_log_info const *rli, bool use_rli_only_for_errors) { LEX_STRING new_db; + Rpl_filter *rpl_filter= rli->mi->rpl_filter; DBUG_ENTER("Load_log_event::do_apply_event"); new_db.length= db_len; @@ -10040,8 +10042,8 @@ check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list) enum_tbl_map_status res= OK_TO_PROCESS; if (rli->sql_thd->slave_thread /* filtering is for slave only */ && - (!rpl_filter->db_ok(table_list->db) || - (rpl_filter->is_on() && !rpl_filter->tables_ok("", table_list)))) + (!rli->mi->rpl_filter->db_ok(table_list->db) || + (rli->mi->rpl_filter->is_on() && !rli->mi->rpl_filter->tables_ok("", table_list)))) res= FILTERED_OUT; else { @@ -10075,6 +10077,7 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli) char *db_mem, *tname_mem; size_t dummy_len; void *memory; + Rpl_filter *filter; DBUG_ENTER("Table_map_log_event::do_apply_event(Relay_log_info*)"); DBUG_ASSERT(rli->sql_thd == thd); @@ -10088,7 +10091,9 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli) NullS))) DBUG_RETURN(HA_ERR_OUT_OF_MEM); - strmov(db_mem, rpl_filter->get_rewrite_db(m_dbnam, &dummy_len)); + /* call from mysql_client_binlog_statement() will not set rli->mi */ + filter= rli->sql_thd->slave_thread ? rli->mi->rpl_filter : global_rpl_filter; + strmov(db_mem, filter->get_rewrite_db(m_dbnam, &dummy_len)); strmov(tname_mem, m_tblnam); table_list->init_one_table(db_mem, strlen(db_mem), diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 80b6428fca2..3434228b540 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -625,7 +625,8 @@ MYSQL_FILE *bootstrap_file; int bootstrap_error; I_List<THD> threads; -Rpl_filter* rpl_filter; +Rpl_filter* cur_rpl_filter; +Rpl_filter* global_rpl_filter; Rpl_filter* binlog_filter; THD *first_global_thread() @@ -1977,7 +1978,7 @@ void clean_up(bool print_message) #endif my_uuid_end(); delete binlog_filter; - delete rpl_filter; + delete global_rpl_filter; end_ssl(); vio_end(); my_regex_end(); @@ -3743,9 +3744,9 @@ static int init_common_variables() max_system_variables.pseudo_thread_id= (ulong)~0; server_start_time= flush_status_time= my_time(0); - rpl_filter= new Rpl_filter; + global_rpl_filter= new Rpl_filter; binlog_filter= new Rpl_filter; - if (!rpl_filter || !binlog_filter) + if (!global_rpl_filter || !binlog_filter) { sql_perror("Could not allocate replication and binlog filters"); return 1; @@ -5298,6 +5299,9 @@ int mysqld_main(int argc, char **argv) create_shutdown_thread(); start_handle_manager(); + /* Copy default global rpl_filter to global_rpl_filter */ + copy_filter_setting(global_rpl_filter, get_or_create_rpl_filter("", 0)); + /* init_slave() must be called after the thread keys are created. Some parts of the code (e.g. SHOW STATUS LIKE 'slave_running' and other @@ -6724,28 +6728,28 @@ struct my_option my_long_options[]= "while having selected a different or no database. If you need cross " "database updates to work, make sure you have 3.23.28 or later, and use " "replicate-wild-do-table=db_name.%.", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"replicate-do-table", OPT_REPLICATE_DO_TABLE, "Tells the slave thread to restrict replication to the specified table. " "To specify more than one table, use the directive multiple times, once " "for each table. This will work for cross-database updates, in contrast " - "to replicate-do-db.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + "to replicate-do-db.", 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"replicate-ignore-db", OPT_REPLICATE_IGNORE_DB, "Tells the slave thread to not replicate to the specified database. To " "specify more than one database to ignore, use the directive multiple " "times, once for each database. This option will not work if you use " "cross database updates. If you need cross database updates to work, " "make sure you have 3.23.28 or later, and use replicate-wild-ignore-" - "table=db_name.%. ", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + "table=db_name.%. ", 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"replicate-ignore-table", OPT_REPLICATE_IGNORE_TABLE, "Tells the slave thread to not replicate to the specified table. To specify " "more than one table to ignore, use the directive multiple times, once for " "each table. This will work for cross-database updates, in contrast to " - "replicate-ignore-db.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + "replicate-ignore-db.", 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"replicate-rewrite-db", OPT_REPLICATE_REWRITE_DB, "Updates to a database with a different name than the original. Example: " "replicate-rewrite-db=master_db_name->slave_db_name.", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, #ifdef HAVE_REPLICATION {"replicate-same-server-id", 0, "In replication, if set to 1, do not skip events having our server id. " @@ -6761,7 +6765,7 @@ struct my_option my_long_options[]= "database updates. Example: replicate-wild-do-table=foo%.bar% will " "replicate only updates to tables in all databases that start with foo " "and whose table names start with bar.", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"replicate-wild-ignore-table", OPT_REPLICATE_WILD_IGNORE_TABLE, "Tells the slave thread to not replicate to the tables that match the " "given wildcard pattern. To specify more than one table to ignore, use " @@ -6769,7 +6773,7 @@ struct my_option my_long_options[]= "cross-database updates. Example: replicate-wild-ignore-table=foo%.bar% " "will not do updates to tables in databases that start with foo and whose " "table names start with bar.", - 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + 0, 0, 0, GET_STR | GET_ASK_ADDR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"safe-mode", OPT_SAFE, "Skip some optimize stages (for testing). Deprecated.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"safe-user-create", 0, @@ -8063,12 +8067,12 @@ mysqld_get_one_option(int optid, #ifdef HAVE_REPLICATION case (int)OPT_REPLICATE_IGNORE_DB: { - rpl_filter->add_ignore_db(argument); + cur_rpl_filter->add_ignore_db(argument); break; } case (int)OPT_REPLICATE_DO_DB: { - rpl_filter->add_do_db(argument); + cur_rpl_filter->add_do_db(argument); break; } case (int)OPT_REPLICATE_REWRITE_DB: @@ -8099,7 +8103,7 @@ mysqld_get_one_option(int optid, return 1; } - rpl_filter->add_db_rewrite(key, val); + cur_rpl_filter->add_db_rewrite(key, val); break; } @@ -8115,7 +8119,7 @@ mysqld_get_one_option(int optid, } case (int)OPT_REPLICATE_DO_TABLE: { - if (rpl_filter->add_do_table(argument)) + if (cur_rpl_filter->add_do_table(argument)) { sql_print_error("Could not add do table rule '%s'!\n", argument); return 1; @@ -8124,7 +8128,7 @@ mysqld_get_one_option(int optid, } case (int)OPT_REPLICATE_WILD_DO_TABLE: { - if (rpl_filter->add_wild_do_table(argument)) + if (cur_rpl_filter->add_wild_do_table(argument)) { sql_print_error("Could not add do table rule '%s'!\n", argument); return 1; @@ -8133,7 +8137,7 @@ mysqld_get_one_option(int optid, } case (int)OPT_REPLICATE_WILD_IGNORE_TABLE: { - if (rpl_filter->add_wild_ignore_table(argument)) + if (cur_rpl_filter->add_wild_ignore_table(argument)) { sql_print_error("Could not add ignore table rule '%s'!\n", argument); return 1; @@ -8142,7 +8146,7 @@ mysqld_get_one_option(int optid, } case (int)OPT_REPLICATE_IGNORE_TABLE: { - if (rpl_filter->add_ignore_table(argument)) + if (cur_rpl_filter->add_ignore_table(argument)) { sql_print_error("Could not add ignore table rule '%s'!\n", argument); return 1; @@ -8335,7 +8339,7 @@ mysqld_get_one_option(int optid, C_MODE_START static void* -mysql_getopt_value(const char *keyname, uint key_length, +mysql_getopt_value(const char *name, uint length, const struct my_option *option, int *error) { if (error) @@ -8348,7 +8352,7 @@ mysql_getopt_value(const char *keyname, uint key_length, case OPT_KEY_CACHE_PARTITIONS: { KEY_CACHE *key_cache; - if (!(key_cache= get_or_create_key_cache(keyname, key_length))) + if (!(key_cache= get_or_create_key_cache(name, length))) { if (error) *error= EXIT_OUT_OF_MEMORY; @@ -8367,6 +8371,22 @@ mysql_getopt_value(const char *keyname, uint key_length, return (uchar**) &key_cache->param_partitions; } } + case OPT_REPLICATE_DO_DB: + case OPT_REPLICATE_DO_TABLE: + case OPT_REPLICATE_IGNORE_DB: + case OPT_REPLICATE_IGNORE_TABLE: + case OPT_REPLICATE_WILD_DO_TABLE: + case OPT_REPLICATE_WILD_IGNORE_TABLE: + case OPT_REPLICATE_REWRITE_DB: + { + /* Store current filter for mysqld_get_one_option() */ + if (!(cur_rpl_filter= get_or_create_rpl_filter(name, length))) + { + if (error) + *error= EXIT_OUT_OF_MEMORY; + } + return 0; + } } return option->value; } diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 96354f05110..0087e73072c 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -117,6 +117,7 @@ #include "records.h" // init_read_record, end_read_record #include <m_ctype.h> #include "sql_select.h" +#include "sql_statistics.h" #include "filesort.h" // filesort_free_buffers #ifndef EXTRA_DEBUG @@ -3218,6 +3219,327 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, } /**************************************************************************** + * Condition selectivity module + ****************************************************************************/ + + +/* + Build descriptors of pseudo-indexes over columns to perform range analysis + + SYNOPSIS + create_key_parts_for_pseudo_indexes() + param IN/OUT data structure for the descriptors to be built + used_fields bitmap of columns for which the descriptors are to be built + + DESCRIPTION + For each column marked in the bitmap used_fields the function builds + a descriptor of a single-component pseudo-index over this column that + can be used for the range analysis of the predicates over this columns. + The descriptors are created in the memory of param->mem_root. + + RETURN + FALSE in the case of success + TRUE otherwise +*/ + +static +bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, + MY_BITMAP *used_fields) +{ + Field **field_ptr; + TABLE *table= param->table; + uint parts= 0; + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + if (bitmap_is_set(used_fields, (*field_ptr)->field_index)) + parts++; + } + + KEY_PART *key_part; + uint keys= 0; + + if (!(key_part= (KEY_PART *) alloc_root(param->mem_root, + sizeof(KEY_PART) * parts))) + return TRUE; + + param->key_parts= key_part; + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + if (bitmap_is_set(used_fields, (*field_ptr)->field_index)) + { + Field *field= *field_ptr; + uint16 store_length; + key_part->key= keys; + key_part->part= 0; + key_part->length= (uint16) field->key_length(); + store_length= key_part->length; + if (field->real_maybe_null()) + store_length+= HA_KEY_NULL_LENGTH; + if (field->real_type() == MYSQL_TYPE_VARCHAR) + store_length+= HA_KEY_BLOB_LENGTH; + key_part->store_length= store_length; + key_part->field= field; + key_part->image_type= Field::itRAW; + key_part->flag= 0; + param->key[keys]= key_part; + keys++; + key_part++; + } + } + param->keys= keys; + param->key_parts_end= key_part; + + return FALSE; +} + + +/* + Estimate the number of rows in all ranges built for a column + by the range optimizer + + SYNOPSIS + records_in_column_ranges() + param the data structure to access descriptors of pseudo indexes + built over columns used in the condition of the processed query + idx the index of the descriptor of interest in param + tree the tree representing ranges built for the interesting column + + DESCRIPTION + This function retrieves the ranges represented by the SEL_ARG 'tree' and + for each of them r it calls the function get_column_range_cardinality() + that estimates the number of expected rows in r. It is assumed that param + is the data structure containing the descriptors of pseudo-indexes that + has been built to perform range analysis of the range conditions imposed + on the columns used in the processed query, while idx is the index of the + descriptor created in 'param' exactly for the column for which 'tree' + has been built by the range optimizer. + + RETURN + the number of rows in the retrieved ranges +*/ + +static +double records_in_column_ranges(PARAM *param, uint idx, + SEL_ARG *tree) +{ + SEL_ARG_RANGE_SEQ seq; + KEY_MULTI_RANGE range; + range_seq_t seq_it; + double rows; + Field *field; + uint flags= 0; + double total_rows= 0; + RANGE_SEQ_IF seq_if = {NULL, sel_arg_range_seq_init, + sel_arg_range_seq_next, 0, 0}; + + /* Handle cases when we don't have a valid non-empty list of range */ + if (!tree) + return HA_POS_ERROR; + if (tree->type == SEL_ARG::IMPOSSIBLE) + return (0L); + + field= tree->field; + + seq.keyno= idx; + seq.real_keyno= MAX_KEY; + seq.param= param; + seq.start= tree; + + seq_it= seq_if.init((void *) &seq, 0, flags); + + while (!seq_if.next(seq_it, &range)) + { + key_range *min_endp, *max_endp; + min_endp= range.start_key.length? &range.start_key : NULL; + max_endp= range.end_key.length? &range.end_key : NULL; + rows= get_column_range_cardinality(field, min_endp, max_endp, + range.range_flag); + if (HA_POS_ERROR == rows) + { + total_rows= HA_POS_ERROR; + break; + } + total_rows += rows; + } + return total_rows; +} + + +/* + Calculate the selectivity of the condition imposed on the rows of a table + + SYNOPSIS + calculate_cond_selectivity_for_table() + thd the context handle + table the table of interest + cond conditions imposed on the rows of the table + + DESCRIPTION + This function calculates the selectivity of range conditions cond imposed + on the rows of 'table' in the processed query. + The calculated selectivity is assigned to the field table->cond_selectivity. + + NOTE + Currently the selectivities of range conditions over different columns are + considered independent. + + RETURN + FALSE on success + TRUE otherwise +*/ + +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond) +{ + uint keynr; + uint max_quick_key_parts= 0; + MY_BITMAP *used_fields= &table->cond_set; + double table_records= table->stat_records(); + DBUG_ENTER("calculate_cond_selectivity_for_table"); + + table->cond_selectivity= 1.0; + + if (table_records == 0) + DBUG_RETURN(FALSE); + + if (thd->variables.optimizer_use_condition_selectivity > 2 && + !bitmap_is_clear_all(used_fields)) + { + /* + Calculate the selectivity of the range conditions not supported + by any index + */ + + PARAM param; + MEM_ROOT alloc; + SEL_TREE *tree; + SEL_ARG **key, **end; + double rows; + uint idx= 0; + + init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0, + MYF(MY_THREAD_SPECIFIC)); + param.thd= thd; + param.mem_root= &alloc; + param.old_root= thd->mem_root; + param.table= table; + param.is_ror_scan= FALSE; + + if (create_key_parts_for_pseudo_indexes(¶m, used_fields)) + goto free_alloc; + + param.prev_tables= param.read_tables= 0; + param.current_table= table->map; + param.using_real_indexes= FALSE; + param.real_keynr[0]= 0; + param.alloced_sel_args= 0; + + thd->no_errors=1; + + tree= get_mm_tree(¶m, cond); + + if (!tree) + goto free_alloc; + + table->reginfo.impossible_range= 0; + if (tree->type == SEL_TREE::IMPOSSIBLE) + { + rows= 0; + table->reginfo.impossible_range= 1; + goto free_alloc; + } + else if (tree->type == SEL_TREE::MAYBE) + { + rows= table_records; + goto free_alloc; + } + + for (key= tree->keys, end= key + param.keys; key != end; key++, idx++) + { + if (*key) + { + if ((*key)->type == SEL_ARG::IMPOSSIBLE) + { + rows= 0; + table->reginfo.impossible_range= 1; + goto free_alloc; + } + else + { + rows= records_in_column_ranges(¶m, idx, *key); + if (rows != HA_POS_ERROR) + (*key)->field->cond_selectivity= rows/table_records; + } + } + } + + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *table_field= *field_ptr; + if (bitmap_is_set(table->read_set, table_field->field_index) && + table_field->cond_selectivity < 1.0) + table->cond_selectivity*= table_field->cond_selectivity; + } + + free_alloc: + thd->mem_root= param.old_root; + free_root(&alloc, MYF(0)); + + } + + /* Calculate the selectivity of the range conditions supported by indexes */ + + bitmap_clear_all(used_fields); + + for (keynr= 0; keynr < table->s->keys; keynr++) + { + if (table->quick_keys.is_set(keynr)) + set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]); + } + + for (uint quick_key_parts= max_quick_key_parts; + quick_key_parts; quick_key_parts--) + { + for (keynr= 0; keynr < table->s->keys; keynr++) + { + if (table->quick_keys.is_set(keynr) && + table->quick_key_parts[keynr] == quick_key_parts) + { + uint i; + uint used_key_parts= table->quick_key_parts[keynr]; + double quick_cond_selectivity= table->quick_rows[keynr] / + table_records; + KEY *key_info= table->key_info + keynr; + KEY_PART_INFO* key_part= key_info->key_part; + for (i= 0; i < used_key_parts; i++, key_part++) + { + if (bitmap_is_set(used_fields, key_part->fieldnr-1)) + break; + bitmap_set_bit(used_fields, key_part->fieldnr-1); + } + if (i) + { + table->cond_selectivity*= quick_cond_selectivity; + if (i != used_key_parts) + { + double f1= key_info->actual_rec_per_key(i-1); + double f2= key_info->actual_rec_per_key(i); + table->cond_selectivity*= f1 / f2; + } + } + } + } + } + + DBUG_RETURN(FALSE); +} + +/**************************************************************************** + * Condition selectivity code ends + ****************************************************************************/ + +/**************************************************************************** * Partition pruning module ****************************************************************************/ #ifdef WITH_PARTITION_STORAGE_ENGINE diff --git a/sql/opt_range.h b/sql/opt_range.h index fff6e414ad9..d98bf1186e8 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1042,6 +1042,8 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables, table_map read_tables, COND *conds, bool allow_null_cond, int *error); +bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item *cond); + #ifdef WITH_PARTITION_STORAGE_ENGINE bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); void store_key_image_to_rec(Field *field, uchar *ptr, uint len); diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc index 1f4e36178db..8029dbf000f 100644 --- a/sql/opt_range_mrr.cc +++ b/sql/opt_range_mrr.cc @@ -268,8 +268,10 @@ walk_up_n_right: range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts); if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && - (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts && - (seq->param->table->key_info[seq->real_keyno].flags & HA_NOSAME) && + (seq->real_keyno == MAX_KEY || + ((uint)key_tree->part+1 == + seq->param->table->key_info[seq->real_keyno].key_parts && + (seq->param->table->key_info[seq->real_keyno].flags & HA_NOSAME))) && range->start_key.length == range->end_key.length && !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length)) range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE); diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 49ad8462fb4..34e705d3218 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -3908,7 +3908,7 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd) &tmpname, (uint) strlen(path)+1, &group_buff, (!using_unique_constraint ? uniq_tuple_length_arg : 0), - &bitmaps, bitmap_buffer_size(1)*3, + &bitmaps, bitmap_buffer_size(1)*5, NullS)) { if (temp_pool_slot != MY_BIT_NONE) diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 007ce2e6d15..01da437504b 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -283,6 +283,7 @@ public: { pos->records_read= best_loose_scan_records; pos->key= best_loose_scan_start_key; + pos->cond_selectivity= 1.0; pos->loosescan_picker.loosescan_key= best_loose_scan_key; pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; pos->use_join_buffer= FALSE; diff --git a/sql/rpl_filter.cc b/sql/rpl_filter.cc index f2bd036896d..2e7a2242d45 100644 --- a/sql/rpl_filter.cc +++ b/sql/rpl_filter.cc @@ -734,6 +734,18 @@ Rpl_filter::get_rewrite_db(const char* db, size_t *new_len) } +void +Rpl_filter::copy_rewrite_db(Rpl_filter *from) +{ + I_List_iterator<i_string_pair> it(from->rewrite_db); + i_string_pair* tmp; + DBUG_ASSERT(rewrite_db.is_empty()); + + /* TODO: Add memory checking here and in all add_xxxx functions ! */ + while ((tmp=it++)) + add_db_rewrite(tmp->key, tmp->val); +} + I_List<i_string>* Rpl_filter::get_do_db() { diff --git a/sql/rpl_filter.h b/sql/rpl_filter.h index 2eb0340b714..65d11cfb6e6 100644 --- a/sql/rpl_filter.h +++ b/sql/rpl_filter.h @@ -88,6 +88,7 @@ public: bool rewrite_db_is_empty(); const char* get_rewrite_db(const char* db, size_t *new_len); + void copy_rewrite_db(Rpl_filter *from); I_List<i_string>* get_do_db(); I_List<i_string>* get_ignore_db(); @@ -139,7 +140,7 @@ private: I_List<i_string_pair> rewrite_db; }; -extern Rpl_filter *rpl_filter; +extern Rpl_filter *global_rpl_filter; extern Rpl_filter *binlog_filter; #endif // RPL_FILTER_H diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index 25fadf20cce..32d3623cb70 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -61,6 +61,13 @@ Master_info::Master_info(LEX_STRING *connection_name_arg, connection_name.length+1); my_casedn_str(system_charset_info, cmp_connection_name.str); } + /* When MySQL restarted, all Rpl_filter settings which aren't in the my.cnf + * will lose. So if you want a setting will not lose after restarting, you + * should add them into my.cnf + * */ + rpl_filter= get_or_create_rpl_filter(connection_name.str, + connection_name.length); + copy_filter_setting(rpl_filter, global_rpl_filter); my_init_dynamic_array(&ignore_server_ids, sizeof(global_system_variables.server_id), 16, 16, @@ -79,6 +86,8 @@ Master_info::Master_info(LEX_STRING *connection_name_arg, Master_info::~Master_info() { + rpl_filters.delete_element(connection_name.str, connection_name.length, + (void (*)(const char*, uchar*)) free_rpl_filter); my_free(connection_name.str); delete_dynamic(&ignore_server_ids); mysql_mutex_destroy(&run_lock); @@ -747,6 +756,65 @@ void create_logfile_name_with_suffix(char *res_file_name, size_t length, } } +void copy_filter_setting(Rpl_filter* dst_filter, Rpl_filter* src_filter) +{ + char buf[256]; + String tmp(buf, sizeof(buf), &my_charset_bin); + + dst_filter->get_do_db(&tmp); + if (tmp.is_empty()) + { + src_filter->get_do_db(&tmp); + if (!tmp.is_empty()) + dst_filter->set_do_db(tmp.ptr()); + } + + dst_filter->get_do_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_do_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_do_table(tmp.ptr()); + } + + dst_filter->get_ignore_db(&tmp); + if (tmp.is_empty()) + { + src_filter->get_ignore_db(&tmp); + if (!tmp.is_empty()) + dst_filter->set_ignore_db(tmp.ptr()); + } + + dst_filter->get_ignore_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_ignore_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_ignore_table(tmp.ptr()); + } + + dst_filter->get_wild_do_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_wild_do_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_wild_do_table(tmp.ptr()); + } + + dst_filter->get_wild_ignore_table(&tmp); + if (tmp.is_empty()) + { + src_filter->get_wild_ignore_table(&tmp); + if (!tmp.is_empty()) + dst_filter->set_wild_ignore_table(tmp.ptr()); + } + + if (dst_filter->rewrite_db_is_empty()) + { + if (!src_filter->rewrite_db_is_empty()) + dst_filter->copy_rewrite_db(src_filter); + } +} Master_info_index::Master_info_index() { diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index 318306bbbb5..9958f9a5cea 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -21,6 +21,8 @@ #include "rpl_rli.h" #include "rpl_reporting.h" #include "my_sys.h" +#include "rpl_filter.h" +#include "keycaches.h" typedef struct st_mysql MYSQL; @@ -93,6 +95,7 @@ class Master_info : public Slave_reporting_capability uint32 file_id; /* for 3.23 load data infile */ Relay_log_info rli; uint port; + Rpl_filter* rpl_filter; /* Each replication can set its filter rule*/ /* to hold checksum alg in use until IO thread has received FD. Initialized to novalue, then set to the queried from master @@ -142,6 +145,7 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache, bool need_lock_relay_log); int change_master_server_id_cmp(ulong *id1, ulong *id2); +void copy_filter_setting(Rpl_filter* dst_filter, Rpl_filter* src_filter); /* Multi master are handled trough this struct. diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index cb1d7fb22de..740d09e3cf9 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -62,7 +62,7 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery) gtid_sub_id(0), tables_to_lock(0), tables_to_lock_count(0), last_event_start_time(0), deferred_events(NULL),m_flags(0), row_stmt_start_timestamp(0), long_find_row_note_printed(false), - m_annotate_event(0) + m_annotate_event(0), mi(0) { DBUG_ENTER("Relay_log_info::Relay_log_info"); diff --git a/sql/slave.cc b/sql/slave.cc index d0723c331df..77204eb2d59 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2321,6 +2321,7 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, DBUG_PRINT("info",("host is set: '%s'", mi->host)); String *packet= &thd->packet; Protocol *protocol= thd->protocol; + Rpl_filter *rpl_filter= mi->rpl_filter; char buf[256]; String tmp(buf, sizeof(buf), &my_charset_bin); @@ -3809,6 +3810,7 @@ pthread_handler_t handle_slave_sql(void *arg) thd = new THD; // note that contructor of THD uses DBUG_ ! thd->thread_stack = (char*)&thd; // remember where our stack is + thd->rpl_filter = mi->rpl_filter; DBUG_ASSERT(rli->inited); DBUG_ASSERT(rli->mi == mi); @@ -3839,7 +3841,7 @@ pthread_handler_t handle_slave_sql(void *arg) } thd->init_for_queries(); thd->rli_slave= rli; - if ((rli->deferred_events_collecting= rpl_filter->is_on())) + if ((rli->deferred_events_collecting= mi->rpl_filter->is_on())) { rli->deferred_events= new Deferred_log_events(rli); } @@ -4167,7 +4169,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev) if (unlikely(!cev->is_valid())) DBUG_RETURN(1); - if (!rpl_filter->db_ok(cev->db)) + if (!mi->rpl_filter->db_ok(cev->db)) { skip_load_data_infile(net); DBUG_RETURN(0); diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index db734389b7f..c3cf866ca93 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -1906,6 +1906,7 @@ bool change_password(THD *thd, const char *host, const char *user, { TABLE_LIST tables; TABLE *table; + Rpl_filter *rpl_filter= thd->rpl_filter; /* Buffer should be extended when password length is extended. */ char buff[512]; ulong query_length; @@ -3594,6 +3595,7 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table_list, TABLE_LIST tables[3]; bool create_new_users=0; char *db_name, *table_name; + Rpl_filter *rpl_filter= thd->rpl_filter; DBUG_ENTER("mysql_table_grant"); if (!initialized) @@ -3870,6 +3872,7 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, TABLE_LIST tables[2]; bool create_new_users=0, result=0; char *db_name, *table_name; + Rpl_filter *rpl_filter= thd->rpl_filter; DBUG_ENTER("mysql_routine_grant"); if (!initialized) @@ -4009,6 +4012,7 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list, char tmp_db[SAFE_NAME_LEN+1]; bool create_new_users=0; TABLE_LIST tables[2]; + Rpl_filter *rpl_filter= thd->rpl_filter; DBUG_ENTER("mysql_grant"); if (!initialized) @@ -5762,6 +5766,7 @@ void get_mqh(const char *user, const char *host, USER_CONN *uc) #define GRANT_TABLES 6 int open_grant_tables(THD *thd, TABLE_LIST *tables) { + Rpl_filter *rpl_filter= thd->rpl_filter; DBUG_ENTER("open_grant_tables"); if (!initialized) diff --git a/sql/sql_class.h b/sql/sql_class.h index 99c6f381692..fc931820dcd 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -59,6 +59,7 @@ void set_thd_stage_info(void *thd, class Reprepare_observer; class Relay_log_info; +class Rpl_filter; class Query_log_event; class Load_log_event; @@ -516,7 +517,10 @@ typedef struct system_variables ulong net_write_timeout; ulong optimizer_prune_level; ulong optimizer_search_depth; + ulong optimizer_use_condition_selectivity; ulong use_stat_tables; + ulong histogram_size; + ulong histogram_type; ulong preload_buff_size; ulong profiling_history_size; ulong read_buff_size; @@ -1602,6 +1606,9 @@ public: /* Slave applier execution context */ Relay_log_info* rli_slave; + /* Used to SLAVE SQL thread */ + Rpl_filter* rpl_filter; + void reset_for_next_command(bool calculate_userstat); /* Constant for THD::where initialization in the beginning of every query. @@ -4140,6 +4147,7 @@ class Unique :public Sql_alloc uint size; uint full_size; uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ + bool with_counters; bool merge(TABLE *table, uchar *buff, bool without_last_merge); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 589c1a3b7b9..0b2daade40e 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -171,8 +171,8 @@ const char *xa_state_names[]={ */ inline bool all_tables_not_ok(THD *thd, TABLE_LIST *tables) { - return rpl_filter->is_on() && tables && !thd->spcont && - !rpl_filter->tables_ok(thd->db, tables); + return thd->rpl_filter->is_on() && tables && !thd->spcont && + !thd->rpl_filter->tables_ok(thd->db, tables); } #endif @@ -2155,6 +2155,8 @@ mysql_execute_command(THD *thd) #ifdef HAVE_REPLICATION /* have table map for update for multi-update statement (BUG#37051) */ bool have_table_map_for_update= FALSE; + /* */ + Rpl_filter *rpl_filter= thd->rpl_filter; #endif DBUG_ENTER("mysql_execute_command"); #ifdef WITH_PARTITION_STORAGE_ENGINE @@ -2652,6 +2654,11 @@ case SQLCOM_PREPARE: else delete mi; } + else + { + mi->rpl_filter= get_or_create_rpl_filter(lex_mi->connection_name.str, + lex_mi->connection_name.length); + } mysql_mutex_unlock(&LOCK_active_mi); break; diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 7ff13bf06c3..9891cf1b24e 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -228,6 +228,7 @@ template <class T> bool valid_buffer_range(T jump, #define OPTIMIZER_SWITCH_TABLE_ELIMINATION (1ULL << 26) #define OPTIMIZER_SWITCH_EXTENDED_KEYS (1ULL << 27) #define OPTIMIZER_SWITCH_EXISTS_TO_IN (1ULL << 28) +#define OPTIMIZER_SWITCH_USE_CONDITION_SELECTIVITY (1ULL << 29) #define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index fa82a55a8c7..b2c4c76486a 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -87,12 +87,14 @@ void best_access_path(JOIN *join, JOIN_TAB *s, POSITION *pos, POSITION *loose_scan_pos); static void optimize_straight_join(JOIN *join, table_map join_tables); static bool greedy_search(JOIN *join, table_map remaining_tables, - uint depth, uint prune_level); + uint depth, uint prune_level, + uint use_cond_selectivity); static bool best_extension_by_limited_search(JOIN *join, table_map remaining_tables, uint idx, double record_count, double read_time, uint depth, - uint prune_level); + uint prune_level, + uint use_cond_selectivity); static uint determine_search_depth(JOIN* join); C_MODE_START static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2); @@ -132,7 +134,8 @@ static COND *build_equal_items(JOIN *join, COND *cond, COND_EQUAL *inherited, List<TABLE_LIST> *join_list, bool ignore_on_conds, - COND_EQUAL **cond_equal_ref); + COND_EQUAL **cond_equal_ref, + bool link_equal_fields= FALSE); static COND* substitute_for_best_equal_field(JOIN_TAB *context_tab, COND *cond, COND_EQUAL *cond_equal, @@ -148,8 +151,9 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list, static COND *optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list, bool ignore_on_conds, - Item::cond_result *cond_value, - COND_EQUAL **cond_equal); + Item::cond_result *cond_value, + COND_EQUAL **cond_equal, + int flags= 0); bool const_expression_in_where(COND *conds,Item *item, Item **comp_item); static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table, Procedure *proc); @@ -274,6 +278,8 @@ enum enum_exec_or_opt {WALK_OPTIMIZATION_TABS , WALK_EXECUTION_TABS}; JOIN_TAB *first_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind); JOIN_TAB *next_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind, JOIN_TAB *tab); +static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables); #ifndef DBUG_OFF @@ -1153,7 +1159,7 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S DBUG_RETURN(1); conds= optimize_cond(this, conds, join_list, FALSE, - &cond_value, &cond_equal); + &cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS); if (thd->is_error()) { @@ -3351,6 +3357,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, table->pos_in_table_list= tables; error= tables->fetch_number_of_rows(); set_statistics_for_table(join->thd, table); + bitmap_clear_all(&table->cond_set); #ifdef WITH_PARTITION_STORAGE_ENGINE const bool no_partitions_used= table->no_partitions_used; @@ -3782,6 +3789,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, all select distinct fields participate in one index. */ add_group_and_distinct_keys(join, s); + + s->table->cond_selectivity= 1.0; /* Perform range analysis if there are keys it could use (1). @@ -3790,7 +3799,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, Don't do range analysis for materialized subqueries (4). Don't do range analysis for materialized derived tables (5) */ - if (!s->const_keys.is_clear_all() && // (1) + if ((!s->const_keys.is_clear_all() || + !bitmap_is_clear_all(&s->table->cond_set)) && // (1) (!s->table->pos_in_table_list->embedding || // (2) (s->table->pos_in_table_list->embedding && // (3) s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3) @@ -3798,20 +3808,37 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, !(s->table->pos_in_table_list->derived && // (5) s->table->pos_in_table_list->is_materialized_derived())) // (5) { - ha_rows records; - SQL_SELECT *select; - select= make_select(s->table, found_const_table_map, - found_const_table_map, - *s->on_expr_ref ? *s->on_expr_ref : conds, - 1, &error); - if (!select) - goto error; - records= get_quick_record_count(join->thd, select, s->table, - &s->const_keys, join->row_limit); - s->quick=select->quick; - s->needed_reg=select->needed_reg; - select->quick=0; - if (records == 0 && s->table->reginfo.impossible_range) + bool impossible_range= FALSE; + ha_rows records= HA_POS_ERROR; + SQL_SELECT *select= 0; + if (!s->const_keys.is_clear_all()) + { + select= make_select(s->table, found_const_table_map, + found_const_table_map, + *s->on_expr_ref ? *s->on_expr_ref : conds, + 1, &error); + if (!select) + goto error; + records= get_quick_record_count(join->thd, select, s->table, + &s->const_keys, join->row_limit); + s->quick=select->quick; + s->needed_reg=select->needed_reg; + select->quick=0; + impossible_range= records == 0 && s->table->reginfo.impossible_range; + } + if (!impossible_range) + { + if (join->thd->variables.optimizer_use_condition_selectivity > 1) + calculate_cond_selectivity_for_table(join->thd, s->table, + *s->on_expr_ref ? + *s->on_expr_ref : conds); + if (s->table->reginfo.impossible_range) + { + impossible_range= TRUE; + records= 0; + } + } + if (impossible_range) { /* Impossible WHERE or ON expression @@ -3836,8 +3863,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->found_records=records; s->read_time= s->quick ? s->quick->read_time : 0.0; } - delete select; + if (select) + delete select; } + } if (pull_out_semijoin_tables(join)) @@ -4194,11 +4223,12 @@ add_key_field(JOIN *join, else if (!(field->flags & PART_KEY_FLAG)) { // Don't remove column IS NULL on a LEFT JOIN table - if (!eq_func || (*value)->type() != Item::NULL_ITEM || - !field->table->maybe_null || field->null_ptr) - return; // Not a key. Skip it - optimize= KEY_OPTIMIZE_EXISTS; - DBUG_ASSERT(num_values == 1); + if (eq_func && (*value)->type() == Item::NULL_ITEM && + field->table->maybe_null && !field->null_ptr) + { + optimize= KEY_OPTIMIZE_EXISTS; + DBUG_ASSERT(num_values == 1); + } } if (optimize != KEY_OPTIMIZE_EXISTS) { @@ -4247,7 +4277,11 @@ add_key_field(JOIN *join, break; } if (is_const) + { stat[0].const_keys.merge(possible_keys); + if (possible_keys.is_clear_all()) + bitmap_set_bit(&field->table->cond_set, field->field_index); + } else if (!eq_func) { /* @@ -5318,6 +5352,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) join->positions[idx].table= table; join->positions[idx].key=key; join->positions[idx].records_read=1.0; /* This is a const table */ + join->positions[idx].cond_selectivity= 1.0; join->positions[idx].ref_depend_map= 0; // join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */ @@ -6136,6 +6171,8 @@ choose_plan(JOIN *join, table_map join_tables) { uint search_depth= join->thd->variables.optimizer_search_depth; uint prune_level= join->thd->variables.optimizer_prune_level; + uint use_cond_selectivity= + join->thd->variables.optimizer_use_condition_selectivity; bool straight_join= test(join->select_options & SELECT_STRAIGHT_JOIN); DBUG_ENTER("choose_plan"); @@ -6200,7 +6237,8 @@ choose_plan(JOIN *join, table_map join_tables) if (search_depth == 0) /* Automatically determine a reasonable value for 'search_depth' */ search_depth= determine_search_depth(join); - if (greedy_search(join, join_tables, search_depth, prune_level)) + if (greedy_search(join, join_tables, search_depth, prune_level, + use_cond_selectivity)) DBUG_RETURN(TRUE); } } @@ -6474,6 +6512,8 @@ optimize_straight_join(JOIN *join, table_map join_tables) bool disable_jbuf= join->thd->variables.join_cache_level == 0; double record_count= 1.0; double read_time= 0.0; + uint use_cond_selectivity= + join->thd->variables.optimizer_use_condition_selectivity; POSITION loose_scan_pos; for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++) @@ -6490,6 +6530,11 @@ optimize_straight_join(JOIN *join, table_map join_tables) &loose_scan_pos); join_tables&= ~(s->table->map); + double pushdown_cond_selectivity= 1.0; + if (use_cond_selectivity > 1) + pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, + join_tables); + join->positions[idx].cond_selectivity= pushdown_cond_selectivity; ++idx; } @@ -6577,6 +6622,8 @@ optimize_straight_join(JOIN *join, table_map join_tables) @param search_depth controlls the exhaustiveness of the search @param prune_level the pruning heuristics that should be applied during search + @param use_cond_selectivity specifies how the selectivity of the conditions + pushed to a table should be taken into account @retval FALSE ok @@ -6588,7 +6635,8 @@ static bool greedy_search(JOIN *join, table_map remaining_tables, uint search_depth, - uint prune_level) + uint prune_level, + uint use_cond_selectivity) { double record_count= 1.0; double read_time= 0.0; @@ -6613,7 +6661,8 @@ greedy_search(JOIN *join, /* Find the extension of the current QEP with the lowest cost */ join->best_read= DBL_MAX; if (best_extension_by_limited_search(join, remaining_tables, idx, record_count, - read_time, search_depth, prune_level)) + read_time, search_depth, prune_level, + use_cond_selectivity)) DBUG_RETURN(TRUE); /* 'best_read < DBL_MAX' means that optimizer managed to find @@ -6852,6 +6901,210 @@ double JOIN::get_examined_rows() } +static +double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables, uint keyparts, + uint16 *ref_keyuse_steps) +{ + double sel= 1.0; + COND_EQUAL *cond_equal= join->cond_equal; + + if (!cond_equal || !cond_equal->current_level.elements) + return sel; + + if (!s->keyuse) + return sel; + + Item_equal *item_equal; + List_iterator_fast<Item_equal> it(cond_equal->current_level); + TABLE *table= s->table; + table_map table_bit= table->map; + POSITION *pos= &join->positions[idx]; + + while ((item_equal= it++)) + { + /* + Check whether we need to take into account the selectivity of + multiple equality item_equal. If this is the case multiply + the current value of sel by this selectivity + */ + table_map used_tables= item_equal->used_tables(); + if (!(used_tables & table_bit)) + continue; + if (item_equal->get_const()) + continue; + + Field *fld; + bool adjust_sel= FALSE; + Item_equal_fields_iterator fi(*item_equal); + while((fi++) && !adjust_sel) + { + Field *fld= fi.get_curr_field(); + if (fld->table->map != table_bit) + continue; + if (pos->key == 0) + adjust_sel= TRUE; + else + { + uint i; + KEYUSE *keyuse= pos->key; + uint key= keyuse->key; + + for (i= 0; i < keyparts; i++) + { + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; + if (fld->field_index == fldno) + break; + } + if (i == keyparts) + { + /* + Field fld is included in multiple equality item_equal + and is not a part of the ref key. + The selectivity of the multiple equality must be taken + into account unless one of the ref arguments is + equal to fld. + */ + adjust_sel= TRUE; + for (uint j= 0; j < keyparts && adjust_sel; j++) + { + if (j > 0) + keyuse+= ref_keyuse_steps[j-1]; + Item *ref_item= keyuse->val; + if (ref_item->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *field_item= (Item_field *) (ref_item->real_item()); + if (item_equal->contains(field_item->field)) + adjust_sel= FALSE; + } + } + } + } + } + if (adjust_sel) + { + /* + If ref == 0 and there are no fields in the multiple equality + item_equal that belong to the tables joined prior to s + then the selectivity of multiple equality will be set to 1.0. + */ + double eq_fld_sel= 1.0; + fi.rewind(); + while ((fi++)) + { + double curr_eq_fld_sel; + fld= fi.get_curr_field(); + if (!fld->table->map & ~(table_bit | rem_tables)) + continue; + curr_eq_fld_sel= get_column_avg_frequency(fld) / + fld->table->stat_records(); + if (curr_eq_fld_sel < 1.0) + set_if_bigger(eq_fld_sel, curr_eq_fld_sel); + } + sel*= eq_fld_sel; + } + } + return sel; +} + +static +double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables) +{ + uint16 ref_keyuse_steps[MAX_REF_PARTS - 1]; + Field *field; + TABLE *table= s->table; + MY_BITMAP *read_set= table->read_set; + double sel= s->table->cond_selectivity; + double table_records= table->stat_records(); + POSITION *pos= &join->positions[idx]; + uint keyparts= 0; + uint found_part_ref_or_null= 0; + + /* Discount the selectivity of the access method used to join table s */ + if (s->quick && s->quick->index != MAX_KEY) + { + if (pos->key == 0 && table_records > 0) + { + sel*= table->quick_rows[s->quick->index]/table_records; + } + } + else if (pos->key != 0) + { + /* A ref/ access or hash join is used to join table */ + KEYUSE *keyuse= pos->key; + KEYUSE *prev_ref_keyuse= keyuse; + uint key= keyuse->key; + do + { + if (!(keyuse->used_tables & (rem_tables | table->map))) + { + if (are_tables_local(s, keyuse->val->used_tables())) + { + if (is_hash_join_key_no(key)) + { + if (keyparts == keyuse->keypart) + keyparts++; + } + else + { + if (keyparts == keyuse->keypart && + !(~(keyuse->val->used_tables()) & pos->ref_depend_map) && + !(found_part_ref_or_null & keyuse->optimize)) + { + keyparts++; + found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ; + } + } + if (keyparts > keyuse->keypart) + { + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1; + if (keyuse->val->const_item()) + sel*= table->field[fldno]->cond_selectivity; + if (keyparts > 1) + { + ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse; + prev_ref_keyuse= keyuse; + } + } + } + } + keyuse++; + } while (keyuse->table == table && keyuse->key == key); + } + + for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) + { + if (!bitmap_is_set(read_set, field->field_index) || + !field->next_equal_field) + continue; + for (Field *next_field= field->next_equal_field; + next_field != field; + next_field= next_field->next_equal_field) + { + if (!(next_field->table->map & rem_tables) && next_field->table != table) + { + sel/= field->cond_selectivity; + break; + } + } + } + + sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, + keyparts, ref_keyuse_steps); + + return sel; +} + + /** Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search. @@ -6962,6 +7215,8 @@ double JOIN::get_examined_rows() @param prune_level pruning heuristics that should be applied during optimization (values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS) + @param use_cond_selectivity specifies how the selectivity of the conditions + pushed to a table should be taken into account @retval FALSE ok @@ -6976,7 +7231,8 @@ best_extension_by_limited_search(JOIN *join, double record_count, double read_time, uint search_depth, - uint prune_level) + uint prune_level, + uint use_cond_selectivity) { DBUG_ENTER("best_extension_by_limited_search"); @@ -7079,16 +7335,25 @@ best_extension_by_limited_search(JOIN *join, } } + double pushdown_cond_selectivity= 1.0; + if (use_cond_selectivity > 1) + pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, + remaining_tables & + ~real_table_bit); + join->positions[idx].cond_selectivity= pushdown_cond_selectivity; + double partial_join_cardinality= current_record_count * + pushdown_cond_selectivity; if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables ) { /* Recursively expand the current partial plan */ swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); if (best_extension_by_limited_search(join, remaining_tables & ~real_table_bit, idx + 1, - current_record_count, + partial_join_cardinality, current_read_time, search_depth - 1, - prune_level)) + prune_level, + use_cond_selectivity)) DBUG_RETURN(TRUE); swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); } @@ -7106,7 +7371,7 @@ best_extension_by_limited_search(JOIN *join, { memcpy((uchar*) join->best_positions, (uchar*) join->positions, sizeof(POSITION) * (idx + 1)); - join->record_count= current_record_count; + join->record_count= partial_join_cardinality; join->best_read= current_read_time - 0.001; } DBUG_EXECUTE("opt", print_plan(join, idx+1, @@ -7750,8 +8015,8 @@ get_best_combination(JOIN *join) sub-order */ SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; - j->records_read= sjm->is_sj_scan? sjm->rows : 1; - j->records= (ha_rows) j->records_read; + j->records= j->records_read= (ha_rows)(sjm->is_sj_scan? sjm->rows : 1); + j->cond_selectivity= 1.0; JOIN_TAB *jt; JOIN_TAB_RANGE *jt_range; if (!(jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB)*sjm->tables)) || @@ -7814,7 +8079,8 @@ get_best_combination(JOIN *join) Save records_read in JOIN_TAB so that select_describe()/etc don't have to access join->best_positions[]. */ - j->records_read= join->best_positions[tablenr].records_read; + j->records_read= (ha_rows)join->best_positions[tablenr].records_read; + j->cond_selectivity= join->best_positions[tablenr].cond_selectivity; join->map2table[j->table->tablenr]= j; /* If we've reached the end of sjm nest, switch back to main sequence */ @@ -11792,7 +12058,8 @@ static bool check_equality(THD *thd, Item *item, COND_EQUAL *cond_equal, */ static COND *build_equal_items_for_cond(THD *thd, COND *cond, - COND_EQUAL *inherited) + COND_EQUAL *inherited, + bool link_item_fields) { Item_equal *item_equal; COND_EQUAL cond_equal; @@ -11839,6 +12106,7 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond, List_iterator_fast<Item_equal> it(cond_equal.current_level); while ((item_equal= it++)) { + item_equal->set_link_equal_fields(link_item_fields); item_equal->fix_fields(thd, NULL); item_equal->update_used_tables(); set_if_bigger(thd->lex->current_select->max_equal_elems, @@ -11858,7 +12126,8 @@ static COND *build_equal_items_for_cond(THD *thd, COND *cond, while ((item= li++)) { Item *new_item; - if ((new_item= build_equal_items_for_cond(thd, item, inherited)) != item) + if ((new_item= build_equal_items_for_cond(thd, item, inherited, FALSE)) + != item) { /* This replacement happens only for standalone equalities */ /* @@ -12021,14 +12290,15 @@ static COND *build_equal_items(JOIN *join, COND *cond, COND_EQUAL *inherited, List<TABLE_LIST> *join_list, bool ignore_on_conds, - COND_EQUAL **cond_equal_ref) + COND_EQUAL **cond_equal_ref, + bool link_equal_fields) { THD *thd= join->thd; COND_EQUAL *cond_equal= 0; if (cond) { - cond= build_equal_items_for_cond(thd, cond, inherited); + cond= build_equal_items_for_cond(thd, cond, inherited, link_equal_fields); cond->update_used_tables(); if (cond->type() == Item::COND_ITEM && ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -13544,9 +13814,10 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, static COND * -optimize_cond(JOIN *join, COND *conds, +optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list, bool ignore_on_conds, - Item::cond_result *cond_value, COND_EQUAL **cond_equal) + Item::cond_result *cond_value, COND_EQUAL **cond_equal, + int flags) { THD *thd= join->thd; DBUG_ENTER("optimize_cond"); @@ -13569,9 +13840,10 @@ optimize_cond(JOIN *join, COND *conds, multiple equality contains a constant. */ DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY);); - conds= build_equal_items(join, conds, NULL, join_list, ignore_on_conds, - cond_equal); - DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY);); + conds= build_equal_items(join, conds, NULL, join_list, + ignore_on_conds, cond_equal, + test(flags & OPT_LINK_EQUAL_FIELDS)); + DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY);); /* change field = field to field = const for each found field = const */ propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds); @@ -14064,6 +14336,8 @@ Field *create_tmp_field_from_field(THD *thd, Field *org_field, ((Field_double *) new_field)->not_fixed= TRUE; new_field->vcol_info= 0; new_field->stored_in_db= TRUE; + new_field->cond_selectivity= 1.0; + new_field->next_equal_field= NULL; } return new_field; } @@ -14408,6 +14682,9 @@ void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps) bitmap_init(&table->eq_join_set, (my_bitmap_map*) (bitmaps+ 3*bitmap_buffer_size(field_count)), field_count, FALSE); + bitmap_init(&table->cond_set, + (my_bitmap_map*) (bitmaps+ 4*bitmap_buffer_size(field_count)), + field_count, FALSE); /* write_set and all_set are copies of read_set */ table->def_write_set= table->def_read_set; table->s->all_set= table->def_read_set; @@ -14571,7 +14848,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, &tmpname, (uint) strlen(path)+1, &group_buff, (group && ! using_unique_constraint ? param->group_length : 0), - &bitmaps, bitmap_buffer_size(field_count)*4, + &bitmaps, bitmap_buffer_size(field_count)*5, NullS)) { if (temp_pool_slot != MY_BIT_NONE) @@ -15326,7 +15603,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list) &share, sizeof(*share), &field, (field_count + 1) * sizeof(Field*), &blob_field, (field_count+1) *sizeof(uint), - &bitmaps, bitmap_buffer_size(field_count)*4, + &bitmaps, bitmap_buffer_size(field_count)*5, NullS)) return 0; @@ -22183,7 +22460,13 @@ int JOIN::print_explain(select_result_sink *result, uint8 explain_flags, { float f= 0.0; if (examined_rows) - f= (100.0 * (float)tab->records_read) / examined_rows; + { + double pushdown_cond_selectivity= tab->cond_selectivity; + if (pushdown_cond_selectivity == 1.0) + f= (float) (100.0 * tab->records_read / examined_rows); + else + f= (float) (100.0 * pushdown_cond_selectivity); + } set_if_smaller(f, 100.0); item_list.push_back(new Item_float(f, 2)); } diff --git a/sql/sql_select.h b/sql/sql_select.h index c748a4f5101..22880a251aa 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -283,6 +283,9 @@ typedef struct st_join_table { /* Copy of POSITION::records_read, set by get_best_combination() */ double records_read; + /* The selectivity of the conditions that can be pushed to the table */ + double cond_selectivity; + /* Startup cost for execution */ double startup_cost; @@ -763,6 +766,9 @@ typedef struct st_position :public Sql_alloc */ double records_read; + /* The selectivity of the pushed down conditions */ + double cond_selectivity; + /* Cost accessing the table in course of the entire complete join execution, i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times @@ -1811,6 +1817,8 @@ void eliminate_tables(JOIN *join); /* Index Condition Pushdown entry point function */ void push_index_cond(JOIN_TAB *tab, uint keyno); +#define OPT_LINK_EQUAL_FIELDS 1 + /**************************************************************************** Temporary table support for SQL Runtime ***************************************************************************/ diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index e34b4b21819..4df4b4d257d 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -26,6 +26,7 @@ #include "sql_base.h" #include "key.h" #include "sql_statistics.h" +#include "opt_range.h" #include "my_atomic.h" /* @@ -249,12 +250,13 @@ public: and column_name with column_name taken out of the only parameter f of the Field* type passed to this method. After this get_stat_values looks for a row by the set key value. If the row is found the values of statistical - data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency - are read into internal structures. Values of nulls_ratio, avg_length, - avg_frequency are read into the corresponding fields of the read_stat - structure from the Field object f, while values from min_value and max_value - are copied into the min_value and max_value record buffers attached to the - TABLE structure for table t. + data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency, + hist_size, hist_type, histogram are read into internal structures. Values + of nulls_ratio, avg_length, avg_frequency, hist_size, hist_type, histogram + are read into the corresponding fields of the read_stat structure from + the Field object f, while values from min_value and max_value are copied + into the min_value and max_value record buffers attached to the TABLE + structure for table t. If the value of a statistical column in the found row is null, then the corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off. Otherwise the flag is set on. If no row is found for the column the all flags @@ -867,11 +869,12 @@ public: @details This implementation of a purely virtual method sets the value of the - columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length' and - 'avg_frequency' of the stistical table columns_stat according to the - contents of the bitmap write_stat.column_stat_nulls and the values - of the fields min_value, max_value, nulls_ratio, avg_length and - avg_frequency of the structure write_stat from the Field structure + columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length', + 'avg_frequency', 'hist_size', 'hist_type' and 'histogram' of the + stistical table columns_stat according to the contents of the bitmap + write_stat.column_stat_nulls and the values of the fields min_value, + max_value, nulls_ratio, avg_length, avg_frequency, hist_size, hist_type + and histogram of the structure write_stat from the Field structure for the field 'table_field'. The value of the k-th column in the table columns_stat is set to NULL if the k-th bit in the bitmap 'column_stat_nulls' is set to 1. @@ -888,7 +891,7 @@ public: char buff[MAX_FIELD_WIDTH]; String val(buff, sizeof(buff), &my_charset_utf8_bin); - for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_AVG_FREQUENCY; i++) + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++) { Field *stat_field= stat_table->field[i]; if (table_field->collected_stats->is_null(i)) @@ -923,7 +926,21 @@ public: break; case COLUMN_STAT_AVG_FREQUENCY: stat_field->store(table_field->collected_stats->get_avg_frequency()); - break; + break; + case COLUMN_STAT_HIST_SIZE: + stat_field->store(table_field->collected_stats->histogram.get_size()); + break; + case COLUMN_STAT_HIST_TYPE: + stat_field->store(table_field->collected_stats->histogram.get_type() + + 1); + break; + case COLUMN_STAT_HISTOGRAM: + const char * col_histogram= + (const char *) (table_field->collected_stats->histogram.get_values()); + stat_field->store(col_histogram, + table_field->collected_stats->histogram.get_size(), + &my_charset_bin); + break; } } } @@ -936,14 +953,15 @@ public: @details This implementation of a purely virtual method first looks for a record - the statistical table column_stats by its primary key set the record + in the statistical table column_stats by its primary key set in the record buffer with the help of Column_stat::set_key_fields. Then, if the row is found, the function reads the values of the columns 'min_value', - 'max_value', 'nulls_ratio', 'avg_length' and 'avg_frequency' of the - table column_stat and sets accordingly the value of the bitmap - read_stat.column_stat_nulls' and the values of the fields min_value, - max_value, nulls_ratio, avg_length and avg_frequency of the structure - read_stat from the Field structure for the field 'table_field'. + 'max_value', 'nulls_ratio', 'avg_length', 'avg_frequency', 'hist_size' and + 'hist_type" of the table column_stat and sets accordingly the value of + the bitmap read_stat.column_stat_nulls' and the values of the fields + min_value, max_value, nulls_ratio, avg_length, avg_frequency, hist_size and + hist_type of the structure read_stat from the Field structure for the field + 'table_field'. */ void get_stat_values() @@ -960,7 +978,7 @@ public: char buff[MAX_FIELD_WIDTH]; String val(buff, sizeof(buff), &my_charset_utf8_bin); - for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_AVG_FREQUENCY; i++) + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_TYPE; i++) { Field *stat_field= stat_table->field[i]; @@ -992,6 +1010,14 @@ public: break; case COLUMN_STAT_AVG_FREQUENCY: table_field->read_stats->set_avg_frequency(stat_field->val_real()); + break; + case COLUMN_STAT_HIST_SIZE: + table_field->read_stats->histogram.set_size(stat_field->val_int()); + break; + case COLUMN_STAT_HIST_TYPE: + Histogram_type hist_type= (Histogram_type) (stat_field->val_int() - + 1); + table_field->read_stats->histogram.set_type(hist_type); break; } } @@ -999,6 +1025,37 @@ public: } } + + /** + @brief + Read histogram from of column_stats + + @details + This method first looks for a record in the statistical table column_stats + by its primary key set the record buffer with the help of + Column_stat::set_key_fields. Then, if the row is found, the function reads + the value of the column 'histogram' of the table column_stat and sets + accordingly the corresponding bit in the bitmap read_stat.column_stat_nulls. + The method assumes that the value of histogram size and the pointer to + the histogram location has been already set in the fields size and values + of read_stats->histogram. + */ + + void get_histogram_value() + { + if (find_stat()) + { + char buff[MAX_FIELD_WIDTH]; + String val(buff, sizeof(buff), &my_charset_utf8_bin); + uint fldno= COLUMN_STAT_HISTOGRAM; + Field *stat_field= stat_table->field[fldno]; + table_field->read_stats->set_not_null(fldno); + stat_field->val_str(&val); + memcpy(table_field->read_stats->histogram.get_values(), + val.ptr(), table_field->read_stats->histogram.get_size()); + } + } + }; @@ -1200,6 +1257,76 @@ public: }; +/* + Histogram_builder is a helper class that is used to build histograms + for columns +*/ + +class Histogram_builder +{ + Field *column; /* table field for which the histogram is built */ + uint col_length; /* size of this field */ + ha_rows records; /* number of records the histogram is built for */ + Field *min_value; /* pointer to the minimal value for the field */ + Field *max_value; /* pointer to the maximal value for the field */ + Histogram *histogram; /* the histogram location */ + uint hist_width; /* the number of points in the histogram */ + double bucket_capacity; /* number of rows in a bucket of the histogram */ + uint curr_bucket; /* number of the current bucket to be built */ + ulonglong count; /* number of values retrieved */ + ulonglong count_distinct; /* number of distinct values retrieved */ + +public: + Histogram_builder(Field *col, uint col_len, ha_rows rows) + : column(col), col_length(col_len), records(rows) + { + Column_statistics *col_stats= col->collected_stats; + min_value= col_stats->min_value; + max_value= col_stats->max_value; + histogram= &col_stats->histogram; + hist_width= histogram->get_width(); + bucket_capacity= (double) records / (hist_width + 1); + curr_bucket= 0; + count= 0; + count_distinct= 0; + } + + ulonglong get_count_distinct() { return count_distinct; } + + int next(void *elem, element_count elem_cnt) + { + count_distinct++; + count+= elem_cnt; + if (curr_bucket == hist_width) + return 0; + if (count > bucket_capacity * (curr_bucket + 1)) + { + column->store_field_value((uchar *) elem, col_length); + histogram->set_value(curr_bucket, + column->pos_in_interval(min_value, max_value)); + curr_bucket++; + while (curr_bucket != hist_width && + count > bucket_capacity * (curr_bucket + 1)) + { + histogram->set_prev_value(curr_bucket); + curr_bucket++; + } + } + return 0; + } +}; + + +C_MODE_START + +int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) +{ + Histogram_builder *hist_builder= (Histogram_builder *) arg; + return hist_builder->next(elem, elem_cnt); +} + +C_MODE_END + /* The class Count_distinct_field is a helper class used to calculate @@ -1220,6 +1347,8 @@ protected: uint tree_key_length; /* The length of the keys for the elements of 'tree */ public: + + Count_distinct_field() {} /** @param @@ -1238,28 +1367,11 @@ public: Count_distinct_field(Field *field, uint max_heap_table_size) { - qsort_cmp2 compare_key; - void* cmp_arg; - enum enum_field_types f_type= field->type(); - table_field= field; tree_key_length= field->pack_length(); - if ((f_type == MYSQL_TYPE_VARCHAR) || - (!field->binary() && (f_type == MYSQL_TYPE_STRING || - f_type == MYSQL_TYPE_VAR_STRING))) - { - compare_key= (qsort_cmp2) simple_str_key_cmp; - cmp_arg= (void*) field; - } - else - { - cmp_arg= (void*) &tree_key_length; - compare_key= (qsort_cmp2) simple_raw_key_cmp; - } - - tree= new Unique(compare_key, cmp_arg, - tree_key_length, max_heap_table_size); + tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, + tree_key_length, max_heap_table_size, 1); } virtual ~Count_distinct_field() @@ -1299,9 +1411,48 @@ public: tree->walk(table_field->table, count_distinct_walk, (void*) &count); return count; } + + /* + @brief + Build the histogram for the elements accumulated in the container of 'tree' + */ + ulonglong get_value_with_histogram(ha_rows rows) + { + Histogram_builder hist_builder(table_field, tree_key_length, rows); + tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder); + return hist_builder.get_count_distinct(); + } + + /* + @brief + Get the size of the histogram in bytes built for table_field + */ + uint get_hist_size() + { + return table_field->collected_stats->histogram.get_size(); + } + + /* + @brief + Get the pointer to the histogram built for table_field + */ + uchar *get_histogram() + { + return table_field->collected_stats->histogram.get_values(); + } + }; +static +int simple_ulonglong_key_cmp(void* arg, uchar* key1, uchar* key2) +{ + ulonglong *val1= (ulonglong *) key1; + ulonglong *val2= (ulonglong *) key2; + return *val1 > *val2 ? 1 : *val1 == *val2 ? 0 : -1; +} + + /* The class Count_distinct_field_bit is derived from the class Count_distinct_field to be used only for fields of the MYSQL_TYPE_BIT type. @@ -1311,8 +1462,17 @@ public: class Count_distinct_field_bit: public Count_distinct_field { public: + Count_distinct_field_bit(Field *field, uint max_heap_table_size) - :Count_distinct_field(field, max_heap_table_size) {} + { + table_field= field; + tree_key_length= sizeof(ulonglong); + + tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, + (void*) &tree_key_length, + tree_key_length, max_heap_table_size, 1); + } + bool add() { longlong val= table_field->val_int(); @@ -1671,13 +1831,27 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) ulong *idx_avg_frequency= (ulong*) alloc_root(&table->mem_root, sizeof(ulong) * key_parts); - if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency) + uint columns= 0; + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) + columns++; + } + uint hist_size= thd->variables.histogram_size; + Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type); + uchar *histogram= NULL; + if (hist_size > 0) + histogram= (uchar *) alloc_root(&table->mem_root, hist_size * columns); + + if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency || + (hist_size && !histogram)) DBUG_RETURN(1); table->collected_stats= table_stats; table_stats->column_stats= column_stats; table_stats->index_stats= index_stats; table_stats->idx_avg_frequency= idx_avg_frequency; + table_stats->histograms= histogram; memset(column_stats, 0, sizeof(Column_statistics) * (fields+1)); @@ -1686,6 +1860,13 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) (*field_ptr)->collected_stats= column_stats; (*field_ptr)->collected_stats->max_value= NULL; (*field_ptr)->collected_stats->min_value= NULL; + if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) + { + column_stats->histogram.set_size(hist_size); + column_stats->histogram.set_type(hist_type); + column_stats->histogram.set_values(histogram); + histogram+= hist_size; + } } memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts); @@ -1790,7 +1971,6 @@ inline bool statistics_for_command_is_needed(THD *thd) @note Currently the function always is called with the parameter is_safe set to FALSE. - */ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, @@ -1902,12 +2082,83 @@ int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, if (!is_safe) mysql_mutex_unlock(&table_share->LOCK_ha_data); - DBUG_RETURN(0); } /** + @brief + Allocate memory for the histogram used by a table share + + @param + thd Thread handler + @param + table_share Table share for which the memory for histogram data is allocated + @param + is_safe TRUE <-> at any time only one thread can perform the function + + @note + The function allocates the memory for the histogram built for a table in the + table's share memory with the intention to read the data there from the + system persistent statistical table mysql.column_stats, + The memory is allocated in the table_share's mem_root. + If the parameter is_safe is TRUE then it is guaranteed that at any given time + only one thread is executed the code of the function. + + @retval + 0 If the memory for all statistical data has been successfully allocated + @retval + 1 Otherwise + + @note + Currently the function always is called with the parameter is_safe set + to FALSE. +*/ + +static +int alloc_histograms_for_table_share(THD* thd, TABLE_SHARE *table_share, + bool is_safe) +{ + TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb; + + DBUG_ENTER("alloc_histograms_for_table_share"); + + if (!is_safe) + mysql_mutex_lock(&table_share->LOCK_ha_data); + + if (stats_cb->histograms_can_be_read) + { + if (!is_safe) + mysql_mutex_unlock(&table_share->LOCK_ha_data); + DBUG_RETURN(0); + } + + Table_statistics *table_stats= stats_cb->table_stats; + ulong total_hist_size= table_stats->total_hist_size; + + if (total_hist_size && !table_stats->histograms) + { + uchar *histograms= (uchar *) alloc_root(&stats_cb->mem_root, + total_hist_size); + if (!histograms) + { + if (!is_safe) + mysql_mutex_unlock(&table_share->LOCK_ha_data); + DBUG_RETURN(1); + } + memset(histograms, 0, total_hist_size); + table_stats->histograms= histograms; + stats_cb->histograms_can_be_read= TRUE; + } + + if (!is_safe) + mysql_mutex_unlock(&table_share->LOCK_ha_data); + + DBUG_RETURN(0); + +} + +/** @brief Initialize the aggregation fields to collect statistics on a column @@ -2005,14 +2256,29 @@ void Column_statistics_collected::finish(ha_rows rows) set_not_null(COLUMN_STAT_AVG_LENGTH); } if (count_distinct) - { - ulonglong distincts= count_distinct->get_value(); + { + ulonglong distincts; + uint hist_size= count_distinct->get_hist_size(); + if (hist_size == 0) + distincts= count_distinct->get_value(); + else + distincts= count_distinct->get_value_with_histogram(rows - nulls); if (distincts) { val= (double) (rows - nulls) / distincts; set_avg_frequency(val); set_not_null(COLUMN_STAT_AVG_FREQUENCY); } + else + hist_size= 0; + histogram.set_size(hist_size); + set_not_null(COLUMN_STAT_HIST_SIZE); + if (hist_size && distincts) + { + set_not_null(COLUMN_STAT_HIST_TYPE); + histogram.set_values(count_distinct->get_histogram()); + set_not_null(COLUMN_STAT_HISTOGRAM); + } delete count_distinct; count_distinct= NULL; } @@ -2233,16 +2499,19 @@ int collect_statistics_for_table(THD *thd, TABLE *table) table->collected_stats->cardinality= rows; } + bitmap_clear_all(table->write_set); for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; if (!bitmap_is_set(table->read_set, table_field->field_index)) continue; + bitmap_set_bit(table->write_set, table_field->field_index); if (!rc) table_field->collected_stats->finish(rows); else table_field->collected_stats->cleanup(); } +bitmap_clear_all(table->write_set); if (!rc) { @@ -2420,6 +2689,7 @@ int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) Field **field_ptr; KEY *key_info, *key_info_end; TABLE_SHARE *table_share= table->s; + Table_statistics *read_stats= table_share->stats_cb.table_stats; DBUG_ENTER("read_statistics_for_table"); @@ -2431,16 +2701,18 @@ int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) /* Read statistics from the statistical table column_stats */ stat_table= stat_tables[COLUMN_STAT].table; + ulong total_hist_size= 0; Column_stat column_stat(stat_table, table); for (field_ptr= table_share->field; *field_ptr; field_ptr++) { table_field= *field_ptr; column_stat.set_key_fields(table_field); column_stat.get_stat_values(); + total_hist_size+= table_field->read_stats->histogram.get_size(); } + read_stats->total_hist_size= total_hist_size; /* Read statistics from the statistical table index_stats */ - Table_statistics *read_stats= table_share->stats_cb.table_stats; stat_table= stat_tables[INDEX_STAT].table; Index_stat index_stat(stat_table, table); for (key_info= table_share->key_info, @@ -2558,10 +2830,14 @@ bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables) TABLE_SHARE *table_share= tl->table->s; if (table_share && table_share->stats_cb.stats_can_be_read && - !table_share->stats_cb.stats_is_read) + (!table_share->stats_cb.stats_is_read || + (!table_share->stats_cb.histograms_are_read && + thd->variables.optimizer_use_condition_selectivity > 3))) return TRUE; if (table_share->stats_cb.stats_is_read) tl->table->stats_is_read= TRUE; + if (table_share->stats_cb.histograms_are_read) + tl->table->histograms_are_read= TRUE; } } @@ -2571,6 +2847,73 @@ bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables) /** @brief + Read histogram for a table from the persistent statistical tables + + @param + thd The thread handle + @param + table The table to read histograms for + @param + stat_tables The array of TABLE_LIST objects for statistical tables + + @details + For the statistical table columns_stats the function looks for the rows + from this table that contain statistical data on 'table'. If such rows + are found the histograms from them are read into the memory allocated + for histograms of 'table'. Later at the query processing these histogram + are supposed to be used by the optimizer. + The parameter stat_tables should point to an array of TABLE_LIST + objects for all statistical tables linked into a list. All statistical + tables are supposed to be opened. + The function is called by read_statistics_for_tables_if_needed(). + + @retval + 0 If data has been successfully read for the table + @retval + 1 Otherwise + + @note + Objects of the helper Column_stat are employed read histogram + from the statistical table column_stats now. +*/ + +static +int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) +{ + TABLE_SHARE *table_share= table->s; + + DBUG_ENTER("read_histograms_for_table"); + + if (!table_share->stats_cb.histograms_can_be_read) + { + (void) alloc_histograms_for_table_share(thd, table_share, FALSE); + } + if (table_share->stats_cb.histograms_can_be_read && + !table_share->stats_cb.histograms_are_read) + { + Field **field_ptr; + uchar *histogram= table_share->stats_cb.table_stats->histograms; + TABLE *stat_table= stat_tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, table); + for (field_ptr= table_share->field; *field_ptr; field_ptr++) + { + Field *table_field= *field_ptr; + uint hist_size= table_field->read_stats->histogram.get_size(); + if (hist_size) + { + column_stat.set_key_fields(table_field); + table_field->read_stats->histogram.set_values(histogram); + column_stat.get_histogram_value(); + histogram+= hist_size; + } + } + } + + DBUG_RETURN(0); +} + +/** + @brief Read statistics for tables from a table list if it is needed @param @@ -2596,7 +2939,7 @@ int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables) TABLE_LIST stat_tables[STATISTICS_TABLES]; Open_tables_backup open_tables_backup; - DBUG_ENTER("read_statistics_for_table_if_needed"); + DBUG_ENTER("read_statistics_for_tables_if_needed"); DEBUG_SYNC(thd, "statistics_read_start"); @@ -2623,6 +2966,14 @@ int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables) } if (table_share->stats_cb.stats_is_read) tl->table->stats_is_read= TRUE; + if (thd->variables.optimizer_use_condition_selectivity > 3 && + table_share && !table_share->stats_cb.histograms_are_read) + { + (void) read_histograms_for_table(thd, tl->table, stat_tables); + table_share->stats_cb.histograms_are_read= TRUE; + } + if (table_share->stats_cb.stats_is_read) + tl->table->histograms_are_read= TRUE; } } @@ -3054,3 +3405,152 @@ void set_statistics_for_table(THD *thd, TABLE *table) key_info->read_stats->get_avg_frequency(0) > 0.5); } } + + +/** + @brief + Get the average frequency for a column + + @param + field The column whose average frequency is required + + @retval + The required average frequency +*/ + +double get_column_avg_frequency(Field * field) +{ + double res; + TABLE *table= field->table; + + /* + Statistics is shared by table instances and is accessed through + the table share. If table->s->field is not set for 'table', then + no column statistics is available for the table . + */ + if (!table->s->field) + { + res= table->stat_records(); + return res; + } + + Column_statistics *col_stats= table->s->field[field->field_index]->read_stats; + + if (!col_stats) + res= table->stat_records(); + else + res= col_stats->get_avg_frequency(); + return res; +} + + +/** + @brief + Estimate the number of rows in a column range using data from stat tables + + @param + field The column whose range cardinality is to be estimated + @param + min_endp The left end of the range whose cardinality is required + @param + max_endp The right end of the range whose cardinality is required + @param + range_flag The range flags + + @details + The function gets an estimate of the number of rows in a column range + using the statistical data from the table column_stats. + + @retval + The required estimate of the rows in the column range +*/ + +double get_column_range_cardinality(Field *field, + key_range *min_endp, + key_range *max_endp, + uint range_flag) +{ + double res; + TABLE *table= field->table; + Column_statistics *col_stats= table->field[field->field_index]->read_stats; + double tab_records= table->stat_records(); + + if (!col_stats) + return tab_records; + + double col_nulls= tab_records * col_stats->get_nulls_ratio(); + + double col_non_nulls= tab_records - col_nulls; + + bool nulls_incl= field->null_ptr && min_endp && min_endp->key[0] && + !(range_flag & NEAR_MIN); + + if (col_non_nulls < 1) + res= 0; + else if (min_endp && max_endp && min_endp->length == max_endp->length && + !memcmp(min_endp->key, max_endp->key, min_endp->length)) + { + if (nulls_incl) + { + /* This is null single point range */ + res= col_nulls; + } + else + { + double avg_frequency= col_stats->get_avg_frequency(); + res= avg_frequency; + if (avg_frequency > 1.0 + 0.000001 && + col_stats->min_value && col_stats->max_value) + { + Histogram *hist= &col_stats->histogram; + if (hist->is_available()) + { + double pos= field->pos_in_interval(col_stats->min_value, + col_stats->max_value); + res= col_non_nulls * + hist->point_selectivity(pos, + avg_frequency / col_non_nulls); + } + } + } + } + else + { + if (col_stats->min_value && col_stats->max_value) + { + double sel, min_mp_pos, max_mp_pos; + + if (min_endp && !min_endp->key[0]) + { + store_key_image_to_rec(field, (uchar *) min_endp->key, + min_endp->length); + min_mp_pos= field->pos_in_interval(col_stats->min_value, + col_stats->max_value); + } + else + min_mp_pos= 0.0; + if (max_endp) + { + store_key_image_to_rec(field, (uchar *) max_endp->key, + max_endp->length); + max_mp_pos= field->pos_in_interval(col_stats->min_value, + col_stats->max_value); + } + else + max_mp_pos= 1.0; + + Histogram *hist= &col_stats->histogram; + if (!hist->is_available()) + sel= (max_mp_pos - min_mp_pos); + else + sel= hist->range_selectivity(min_mp_pos, max_mp_pos); + res= col_non_nulls * sel; + set_if_bigger(res, col_stats->get_avg_frequency()); + } + else + res= col_non_nulls; + if (nulls_incl) + res+= col_nulls; + } + return res; +} diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 17f22cec4e5..c6a72478c34 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -16,15 +16,6 @@ #ifndef SQL_STATISTICS_H #define SQL_STATISTICS_H -/* - These enumeration types comprise the dictionary of three - statistical tables table_stat, column_stat and index_stat - as they defined in ../scripts/mysql_system_tables.sql. - - It would be nice if the declarations of these types were - generated automatically by the table definitions. -*/ - typedef enum enum_use_stat_tables_mode { @@ -33,6 +24,13 @@ enum enum_use_stat_tables_mode PEFERABLY, } Use_stat_tables_mode; +typedef +enum enum_histogram_type +{ + SINGLE_PREC_HB, + DOUBLE_PREC_HB +} Histogram_type; + enum enum_stat_tables { TABLE_STAT, @@ -40,6 +38,16 @@ enum enum_stat_tables INDEX_STAT, }; + +/* + These enumeration types comprise the dictionary of three + statistical tables table_stat, column_stat and index_stat + as they defined in ../scripts/mysql_system_tables.sql. + + It would be nice if the declarations of these types were + generated automatically by the table definitions. +*/ + enum enum_table_stat_col { TABLE_STAT_DB_NAME, @@ -56,7 +64,10 @@ enum enum_column_stat_col COLUMN_STAT_MAX_VALUE, COLUMN_STAT_NULLS_RATIO, COLUMN_STAT_AVG_LENGTH, - COLUMN_STAT_AVG_FREQUENCY + COLUMN_STAT_AVG_FREQUENCY, + COLUMN_STAT_HIST_SIZE, + COLUMN_STAT_HIST_TYPE, + COLUMN_STAT_HISTOGRAM }; enum enum_index_stat_col @@ -90,6 +101,160 @@ int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, const char *new_name); void set_statistics_for_table(THD *thd, TABLE *table); +double get_column_avg_frequency(Field * field); + +double get_column_range_cardinality(Field *field, + key_range *min_endp, + key_range *max_endp, + uint range_flag); + +class Histogram +{ + +private: + Histogram_type type; + uint8 size; + uchar *values; + + uint prec_factor() + { + switch (type) { + case SINGLE_PREC_HB: + return ((uint) (1 << 8) - 1); + case DOUBLE_PREC_HB: + return ((uint) (1 << 16) - 1); + } + return 1; + } + +public: + uint get_width() + { + switch (type) { + case SINGLE_PREC_HB: + return size; + case DOUBLE_PREC_HB: + return size / 2; + } + return 0; + } + +private: + uint get_value(uint i) + { + switch (type) { + case SINGLE_PREC_HB: + return (uint) (((uint8 *) values)[i]); + case DOUBLE_PREC_HB: + return (uint) (((uint16 *) values)[i]); + } + return 0; + } + + uint find_bucket(double pos, bool first) + { + uint val= (uint) (pos * prec_factor()); + int lp= 0; + int rp= get_width() - 1; + int d= get_width() / 2; + uint i= lp + d; + for ( ; d; d= (rp - lp) / 2, i= lp + d) + { + if (val == get_value(i)) + break; + if (val < get_value(i)) + rp= i; + else if (val > get_value(i + 1)) + lp= i + 1; + else + break; + } + if (val == get_value(i)) + { + if (first) + { + while(i && val == get_value(i - 1)) + i--; + } + else + { + while(i + 1 < get_width() && val == get_value(i + 1)) + i++; + } + } + return i; + } + +public: + + uint get_size() { return (uint) size; } + + Histogram_type get_type() { return type; } + + uchar *get_values() { return (uchar *) values; } + + void set_size (ulonglong sz) { size= (uint8) sz; } + + void set_type (Histogram_type t) { type= t; } + + void set_values (uchar *vals) { values= (uchar *) vals; } + + bool is_available() { return get_size() > 0 && get_values(); } + + void set_value(uint i, double val) + { + switch (type) { + case SINGLE_PREC_HB: + ((uint8 *) values)[i]= (uint8) (val * prec_factor()); + return; + case DOUBLE_PREC_HB: + ((uint16 *) values)[i]= (uint16) (val * prec_factor()); + return; + } + } + + void set_prev_value(uint i) + { + switch (type) { + case SINGLE_PREC_HB: + ((uint8 *) values)[i]= ((uint8 *) values)[i-1]; + return; + case DOUBLE_PREC_HB: + ((uint16 *) values)[i]= ((uint16 *) values)[i-1]; + return; + } + } + + double range_selectivity(double min_pos, double max_pos) + { + double sel; + double bucket_sel= 1.0/(get_width() + 1); + uint min= find_bucket(min_pos, TRUE); + uint max= find_bucket(max_pos, FALSE); + sel= bucket_sel * (max - min + 1); + return sel; + } + + double point_selectivity(double pos, double avg_sel) + { + double sel; + double bucket_sel= 1.0/(get_width() + 1); + uint min= find_bucket(pos, TRUE); + uint max= min; + while (max + 1 < get_width() && get_value(max + 1) == get_value(max)) + max++; + double inv_prec_factor= (double) 1.0 / prec_factor(); + double width= (max + 1 == get_width() ? + 1.0 : get_value(max) * inv_prec_factor) - + (min == 0 ? + 0.0 : get_value(min-1) * inv_prec_factor); + sel= avg_sel * (bucket_sel * (max + 1 - min)) / width; + return sel; + } + +}; + + class Columns_statistics; class Index_statistics; @@ -105,8 +270,9 @@ public: uchar *min_max_record_buffers; /* Record buffers for min/max values */ Column_statistics *column_stats; /* Array of statistical data for columns */ Index_statistics *index_stats; /* Array of statistical data for indexes */ - ulong *idx_avg_frequency; /* Array of records per key for index prefixes */ - + ulong *idx_avg_frequency; /* Array of records per key for index prefixes */ + ulong total_hist_size; /* Total size of all histograms */ + uchar *histograms; /* Sequence of histograms */ }; @@ -161,10 +327,12 @@ private: public: + Histogram histogram; + void set_all_nulls() { column_stat_nulls= - ((1 << (COLUMN_STAT_AVG_FREQUENCY-COLUMN_STAT_COLUMN_NAME))-1) << + ((1 << (COLUMN_STAT_HISTOGRAM-COLUMN_STAT_COLUMN_NAME))-1) << (COLUMN_STAT_COLUMN_NAME+1); } diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 314f6781635..ae7f4fa7335 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -1663,6 +1663,25 @@ static Sys_var_ulong Sys_optimizer_prune_level( SESSION_VAR(optimizer_prune_level), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, 1), DEFAULT(1), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_use_condition_selectivity( + "optimizer_use_condition_selectivity", + "Controls selectivity of which conditions the optimizer takes into " + "account to calculate cardinality of a partial join when it searches " + "for the best execution plan " + "Meaning: " + "1 - use selectivity of index backed range conditions to calculate " + "the cardinality of a partial join if the last joined table is " + "accessed by full table scan or an index scan, " + "2 - use selectivity of index backed range conditions to calculate " + "the cardinality of a partial join in any case, " + "3 - additionally always use selectivity of range conditions that are " + "not backed by any index to calculate the cardinality of a partial join, " + "4 - use histograms to calculate selectivity of range conditions that " + "are not backed by any index to calculate the cardinality of " + "a partial join.", + SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(1, 4), DEFAULT(1), BLOCK_SIZE(1)); + /** Warns about deprecated value 63 */ static bool fix_optimizer_search_depth(sys_var *self, THD *thd, enum_var_type type) @@ -3531,19 +3550,48 @@ static Sys_var_mybool Sys_relay_log_recovery( bool Sys_var_rpl_filter::global_update(THD *thd, set_var *var) { bool result= true; // Assume error + Master_info *mi; mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_active_mi); - if (!master_info_index->give_error_if_slave_running()) - result= set_filter_value(var->save_result.string_value.str); + + if (!var->base.length) // no base name + { + mi= master_info_index-> + get_master_info(&thd->variables.default_master_connection, + MYSQL_ERROR::WARN_LEVEL_ERROR); + } + else // has base name + { + mi= master_info_index-> + get_master_info(&var->base, + MYSQL_ERROR::WARN_LEVEL_WARN); + } + + if (mi) + { + if (mi->rli.slave_running) + { + my_error(ER_SLAVE_MUST_STOP, MYF(0), + mi->connection_name.length, + mi->connection_name.str); + result= true; + } + else + { + result= set_filter_value(var->save_result.string_value.str, mi); + } + } + mysql_mutex_unlock(&LOCK_active_mi); mysql_mutex_lock(&LOCK_global_system_variables); return result; } -bool Sys_var_rpl_filter::set_filter_value(const char *value) +bool Sys_var_rpl_filter::set_filter_value(const char *value, Master_info *mi) { bool status= true; + Rpl_filter* rpl_filter= mi ? mi->rpl_filter : global_rpl_filter; switch (opt_id) { case OPT_REPLICATE_DO_DB: @@ -3573,7 +3621,32 @@ uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base) { char buf[256]; String tmp(buf, sizeof(buf), &my_charset_bin); + uchar *ret; + Master_info *mi; + Rpl_filter *rpl_filter; + + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_lock(&LOCK_active_mi); + if (!base->length) // no base name + { + mi= master_info_index-> + get_master_info(&thd->variables.default_master_connection, + MYSQL_ERROR::WARN_LEVEL_ERROR); + } + else // has base name + { + mi= master_info_index-> + get_master_info(base, + MYSQL_ERROR::WARN_LEVEL_WARN); + } + mysql_mutex_lock(&LOCK_global_system_variables); + if (!mi) + { + mysql_mutex_unlock(&LOCK_active_mi); + return 0; + } + rpl_filter= mi->rpl_filter; tmp.length(0); switch (opt_id) { @@ -3597,7 +3670,10 @@ uchar *Sys_var_rpl_filter::global_value_ptr(THD *thd, LEX_STRING *base) break; } - return (uchar *) thd->strmake(tmp.ptr(), tmp.length()); + ret= (uchar *) thd->strmake(tmp.ptr(), tmp.length()); + mysql_mutex_unlock(&LOCK_active_mi); + + return ret; } static Sys_var_rpl_filter Sys_replicate_do_db( @@ -4055,6 +4131,24 @@ static Sys_var_enum Sys_optimizer_use_stat_tables( SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG), use_stat_tables_modes, DEFAULT(0)); +static Sys_var_ulong Sys_histogram_size( + "histogram_size", + "Number of bytes used for a histogram. " + "If set to 0, no histograms are created by ANALYZE.", + SESSION_VAR(histogram_size), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 255), DEFAULT(0), BLOCK_SIZE(1)); + +const char *histogram_types[] = + {"SINGLE_PREC_HB", "DOUBLE_PREC_HB", 0}; +static Sys_var_enum Sys_histogram_type( + "histogram_type", + "Specifies type of the histograms created by ANALYZE. " + "Possible values are: " + "SINGLE_PREC_HB - single precision height-balanced, " + "DOUBLE_PREC_HB - double precision height-balanced.", + SESSION_VAR(histogram_type), CMD_LINE(REQUIRED_ARG), + histogram_types, DEFAULT(0)); + static Sys_var_mybool Sys_no_thread_alarm( "debug_no_thread_alarm", "Disable system thread alarm calls. Disabling it may be useful " diff --git a/sql/sys_vars.h b/sql/sys_vars.h index 47cb5a327e1..4a38b41ab9b 100644 --- a/sql/sys_vars.h +++ b/sql/sys_vars.h @@ -28,6 +28,7 @@ #include "keycaches.h" #include "strfunc.h" #include "tztime.h" // my_tz_find, my_tz_SYSTEM, struct Time_zone +#include "rpl_mi.h" // For Multi-Source Replication /* a set of mostly trivial (as in f(X)=X) defines below to make system variable @@ -552,6 +553,7 @@ protected: } }; +class Master_info; class Sys_var_rpl_filter: public sys_var { private: @@ -563,7 +565,7 @@ public: NO_ARG, SHOW_CHAR, 0, NULL, VARIABLE_NOT_IN_BINLOG, NULL, NULL, NULL), opt_id(getopt_id) { - option.var_type= GET_STR; + option.var_type= GET_STR | GET_ASK_ADDR; } bool do_check(THD *thd, set_var *var) @@ -589,7 +591,7 @@ public: protected: uchar *global_value_ptr(THD *thd, LEX_STRING *base); - bool set_filter_value(const char *value); + bool set_filter_value(const char *value, Master_info *mi); }; /** diff --git a/sql/table.cc b/sql/table.cc index 8de3aea3d51..6ee21eb22d8 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -425,6 +425,8 @@ void TABLE_SHARE::destroy() free_root(&stats_cb.mem_root, MYF(0)); stats_cb.stats_can_be_read= FALSE; stats_cb.stats_is_read= FALSE; + stats_cb.histograms_can_be_read= FALSE; + stats_cb.histograms_are_read= FALSE; if (tmp_table == NO_TMP_TABLE) mysql_mutex_unlock(&LOCK_ha_data); @@ -2749,7 +2751,7 @@ partititon_err: /* Allocate bitmaps */ bitmap_size= share->column_bitmap_size; - if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*5))) + if (!(bitmaps= (uchar*) alloc_root(&outparam->mem_root, bitmap_size*6))) goto err; bitmap_init(&outparam->def_read_set, (my_bitmap_map*) bitmaps, share->fields, FALSE); @@ -2761,8 +2763,12 @@ partititon_err: (my_bitmap_map*) (bitmaps+bitmap_size*3), share->fields, FALSE); bitmap_init(&outparam->eq_join_set, (my_bitmap_map*) (bitmaps+bitmap_size*4), share->fields, FALSE); + bitmap_init(&outparam->cond_set, + (my_bitmap_map*) (bitmaps+bitmap_size*5), share->fields, FALSE); outparam->default_column_bitmaps(); + outparam->cond_selectivity= 1.0; + /* The table struct is now initialized; Open the table */ if (db_stat) { @@ -3884,6 +3890,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) file->ha_start_of_new_statement(); reginfo.impossible_range= 0; created= TRUE; + cond_selectivity= 1.0; /* Catch wrong handling of the auto_increment_field_not_null. */ DBUG_ASSERT(!auto_increment_field_not_null); @@ -3892,6 +3899,11 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) pos_in_table_list= tl; clear_column_bitmaps(); + for (Field **f_ptr= field ; *f_ptr ; f_ptr++) + { + (*f_ptr)->next_equal_field= NULL; + (*f_ptr)->cond_selectivity= 1.0; + } DBUG_ASSERT(key_read == 0); diff --git a/sql/table.h b/sql/table.h index 2841b854da1..e721d60f892 100644 --- a/sql/table.h +++ b/sql/table.h @@ -586,7 +586,9 @@ struct TABLE_STATISTICS_CB Table_statistics *table_stats; /* Structure to access the statistical data */ bool stats_can_be_read; /* Memory for statistical data is allocated */ bool stats_is_read; /* Statistical data for table has been read - from statistical tables */ + from statistical tables */ + bool histograms_can_be_read; + bool histograms_are_read; }; @@ -1107,6 +1109,7 @@ public: my_bitmap_map *bitmap_init_value; MY_BITMAP def_read_set, def_write_set, def_vcol_set, tmp_set; MY_BITMAP eq_join_set; /* used to mark equi-joined fields */ + MY_BITMAP cond_set; /* used to mark fields from sargable conditions*/ MY_BITMAP *read_set, *write_set, *vcol_set; /* Active column sets */ /* The ID of the query that opened and is using this table. Has different @@ -1159,6 +1162,8 @@ public: */ ha_rows quick_condition_rows; + double cond_selectivity; + table_map map; /* ID bit of table (1,2,4,8,16...) */ uint lock_position; /* Position in MYSQL_LOCK.table */ @@ -1278,6 +1283,7 @@ public: #endif uint max_keys; /* Size of allocated key_info array. */ bool stats_is_read; /* Persistent statistics is read for the table */ + bool histograms_are_read; MDL_ticket *mdl_ticket; void init(THD *thd, TABLE_LIST *tl); diff --git a/sql/uniques.cc b/sql/uniques.cc index 9fa06311ece..0c1c34d495b 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, full_size= size; if (min_dupl_count_arg) full_size+= sizeof(element_count); + with_counters= test(min_dupl_count_arg); my_b_clear(&file); init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); @@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2) C_MODE_END +inline +element_count get_counter_from_merged_element(void *ptr, uint ofs) +{ + element_count cnt; + memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count)); + return cnt; +} + + +inline +void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt) +{ + memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count)); +} + + /* DESCRIPTION @@ -457,6 +474,8 @@ C_MODE_END file file with all trees dumped. Trees in the file must contain sorted unique values. Cache must be initialized in read mode. + with counters take into account counters for equal merged + elements RETURN VALUE 0 ok <> 0 error @@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, uint key_length, BUFFPEK *begin, BUFFPEK *end, tree_walk_action walk_action, void *walk_action_arg, qsort_cmp2 compare, void *compare_arg, - IO_CACHE *file) + IO_CACHE *file, bool with_counters) { BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; QUEUE queue; @@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, uint bytes_read; /* to hold return value of read_to_buffer */ BUFFPEK *top; int res= 1; + uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0); + element_count cnt; /* Invariant: queue must contain top element from each tree, until a tree is not completely walked through. @@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, /* new top has been obtained; if old top is unique, apply the action */ if (compare(compare_arg, old_key, top->key)) { - if (walk_action(old_key, 1, walk_action_arg)) + cnt= with_counters ? + get_counter_from_merged_element(old_key, cnt_ofs) : 1; + if (walk_action(old_key, cnt, walk_action_arg)) goto end; } + else if (with_counters) + { + cnt= get_counter_from_merged_element(top->key, cnt_ofs); + cnt+= get_counter_from_merged_element(old_key, cnt_ofs); + put_counter_into_merged_element(top->key, cnt_ofs, cnt); + } } /* Applying walk_action to the tail of the last tree: this is safe because @@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, { do { - if (walk_action(top->key, 1, walk_action_arg)) + + cnt= with_counters ? + get_counter_from_merged_element(top->key, cnt_ofs) : 1; + if (walk_action(top->key, cnt, walk_action_arg)) goto end; top->key+= key_length; } @@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) (BUFFPEK *) file_ptrs.buffer, (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements, action, walk_action_arg, - tree.compare, tree.custom_arg, &file); + tree.compare, tree.custom_arg, &file, with_counters); } my_free(merge_buffer); return res; |