diff options
author | Igor Babaev <igor@askmonty.org> | 2012-12-13 23:05:12 -0800 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2012-12-13 23:05:12 -0800 |
commit | a06224bd1594ea1da650f748a8956922eafd2363 (patch) | |
tree | 99fc4c066ce8e3b9c0037333c62b993a787458d8 /sql | |
parent | 65820439bdafeead66496b489c076012c334c710 (diff) | |
download | mariadb-git-a06224bd1594ea1da650f748a8956922eafd2363.tar.gz |
Addressed all remaining issues from the review of the patch
that introduced engine independent persistent statistics.
In particular:
- added an enumeration type for possible values of the system
variable use_stat_tables
- renamed KEY::real_rec_per_key to KEY::actual_rec_per_key
- optimized the collection of statistical data for any primary
key defined only on one column.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/multi_range_read.cc | 2 | ||||
-rw-r--r-- | sql/opt_range.cc | 8 | ||||
-rw-r--r-- | sql/sql_admin.cc | 3 | ||||
-rw-r--r-- | sql/sql_base.cc | 5 | ||||
-rw-r--r-- | sql/sql_base.h | 16 | ||||
-rw-r--r-- | sql/sql_db.cc | 1 | ||||
-rw-r--r-- | sql/sql_delete.cc | 1 | ||||
-rw-r--r-- | sql/sql_join_cache.cc | 3 | ||||
-rw-r--r-- | sql/sql_rename.cc | 1 | ||||
-rw-r--r-- | sql/sql_select.cc | 18 | ||||
-rw-r--r-- | sql/sql_show.cc | 3 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 62 | ||||
-rw-r--r-- | sql/sql_statistics.h | 29 | ||||
-rw-r--r-- | sql/sql_table.cc | 1 | ||||
-rw-r--r-- | sql/structs.h | 2 | ||||
-rw-r--r-- | sql/table.cc | 2 |
16 files changed, 112 insertions, 45 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 1361d5822c9..b9f49a83b4b 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -1201,7 +1201,7 @@ bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, uint parts= my_count_bits(key_tuple_map); ulong rpc; ulonglong rowids_size= rowid_buf_elem_size; - if ((rpc= key_info->real_rec_per_key(parts - 1))) + if ((rpc= key_info->actual_rec_per_key(parts - 1))) rowids_size= rowid_buf_elem_size * rpc; double fraction_for_rowids= diff --git a/sql/opt_range.cc b/sql/opt_range.cc index ab4e696c21c..6f13816d8ea 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -5502,8 +5502,8 @@ ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, ha_rows ext_records= ext_index_scan->records; if (i < used_key_parts) { - ulong f1= key_info->real_rec_per_key(i-1); - ulong f2= key_info->real_rec_per_key(i); + ulong f1= key_info->actual_rec_per_key(i-1); + ulong f2= key_info->actual_rec_per_key(i); ext_records= (ha_rows) ((double) ext_records / f2 * f1); } if (ext_records < table_cardinality) @@ -12642,7 +12642,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, num_blocks= (uint)(table_records / keys_per_block) + 1; /* Compute the number of keys in a group. */ - keys_per_group= index_info->real_rec_per_key(group_key_parts - 1); + keys_per_group= index_info->actual_rec_per_key(group_key_parts - 1); if (keys_per_group == 0) /* If there is no statistics try to guess */ /* each group contains 10% of all records */ keys_per_group= (uint)(table_records / 10) + 1; @@ -12662,7 +12662,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, Compute the probability that two ends of a subgroup are inside different blocks. */ - keys_per_subgroup= index_info->real_rec_per_key(used_key_parts - 1); + keys_per_subgroup= index_info->actual_rec_per_key(used_key_parts - 1); if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */ p_overlap= 1.0; /* a block, it will overlap at least two blocks. */ else diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index f2124dd3bb8..e6bbef482a7 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -29,6 +29,7 @@ #include "sql_parse.h" // check_table_access #include "strfunc.h" #include "sql_admin.h" +#include "sql_statistics.h" /* Prepare, run and cleanup for mysql_recreate_table() */ @@ -718,7 +719,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, if (compl_result_code == HA_ADMIN_OK && operator_func == &handler::ha_analyze && table->table->s->table_category == TABLE_CATEGORY_USER && - (thd->variables.use_stat_tables > 0 || + (get_use_stat_tables_mode(thd) > NEVER || lex->with_persistent_for_clause)) { if (!(compl_result_code= diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 9db8ac1c732..a582dde4277 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -49,6 +49,7 @@ #include "sql_trigger.h" #include "transaction.h" #include "sql_prepare.h" +#include "sql_statistics.h" #include <m_ctype.h> #include <my_dir.h> #include <hash.h> @@ -3142,7 +3143,7 @@ retry_share: while (table_cache_count > table_cache_size && unused_tables) free_cache_entry(unused_tables); - if (thd->variables.use_stat_tables > 0) + if (get_use_stat_tables_mode(thd) > NEVER) { if (share->table_category != TABLE_CATEGORY_SYSTEM) { @@ -4634,7 +4635,7 @@ open_and_process_table(THD *thd, LEX *lex, TABLE_LIST *tables, goto end; } - if (thd->variables.use_stat_tables > 0 && tables->table) + if (get_use_stat_tables_mode(thd) > NEVER && tables->table) { TABLE_SHARE *table_share= tables->table->s; if (table_share && table_share->table_category != TABLE_CATEGORY_SYSTEM) diff --git a/sql/sql_base.h b/sql/sql_base.h index c4cd7f467a0..aa2ba9e5680 100644 --- a/sql/sql_base.h +++ b/sql/sql_base.h @@ -310,22 +310,6 @@ int dynamic_column_error_message(enum_dyncol_func_result rc); /* open_and_lock_tables with optional derived handling */ int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived); -int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); -int collect_statistics_for_table(THD *thd, TABLE *table); -int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share, - bool is_safe); -int alloc_statistics_for_table(THD *thd, TABLE *table); -int update_statistics_for_table(THD *thd, TABLE *table); -int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab); -int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col); -int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info, - bool ext_prefixes_only); -int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab, - LEX_STRING *new_db, LEX_STRING *new_tab); -int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, - const char *new_name); -void set_statistics_for_table(THD *thd, TABLE *table); - extern "C" int simple_raw_key_cmp(void* arg, const void* key1, const void* key2); extern "C" int count_distinct_walk(void *elem, element_count count, void *arg); diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 086445948bd..f4e9ccfc5e6 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -36,6 +36,7 @@ #include "sp.h" #include "events.h" #include "sql_handler.h" +#include "sql_statistics.h" #include <my_dir.h> #include <m_ctype.h> #include "log.h" diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 2dd090578a7..4e35072d508 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -35,6 +35,7 @@ #include "sql_select.h" #include "sp_head.h" #include "sql_trigger.h" +#include "sql_statistics.h" #include "transaction.h" #include "records.h" // init_read_record, #include "sql_derived.h" // mysql_handle_list_of_derived diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 225bb413195..ac96746c389 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -3812,7 +3812,8 @@ uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno) uint incr= 0; TABLE_REF *ref= &join_tab->ref; TABLE *tab= join_tab->table; - uint rec_per_key= tab->key_info[ref->key].real_rec_per_key(ref->key_parts-1); + uint rec_per_key= + tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1); set_if_bigger(rec_per_key, 1); if (recno == 1) incr= ref->key_length + tab->file->ref_length; diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index d27989c3b01..c91623cee6e 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -28,6 +28,7 @@ #include "lock.h" // MYSQL_OPEN_SKIP_TEMPORARY #include "sql_base.h" // tdc_remove_table, lock_table_names, #include "sql_handler.h" // mysql_ha_rm_tables +#include "sql_statistics.h" #include "datadict.h" static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index fcb650b4757..85908623324 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5416,7 +5416,7 @@ best_access_path(JOIN *join, else { uint key_parts= table->actual_n_key_parts(keyinfo); - if (!(records= keyinfo->real_rec_per_key(key_parts-1))) + if (!(records= keyinfo->actual_rec_per_key(key_parts-1))) { /* Prefer longer keys */ records= ((double) s->records / (double) rec * @@ -5516,7 +5516,7 @@ best_access_path(JOIN *join, else { /* Check if we have statistic about the distribution */ - if ((records= keyinfo->real_rec_per_key(max_key_part-1))) + if ((records= keyinfo->actual_rec_per_key(max_key_part-1))) { /* Fix for the case where the index statistics is too @@ -22974,7 +22974,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, if (used_key_parts > used_index_parts) used_pk_parts= used_key_parts-used_index_parts; rec_per_key= used_key_parts ? - keyinfo->real_rec_per_key(used_key_parts-1) : 1; + keyinfo->actual_rec_per_key(used_key_parts-1) : 1; /* Take into account the selectivity of the used pk prefix */ if (used_pk_parts) { @@ -22989,8 +22989,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, rec_per_key= 1; if (rec_per_key > 1) { - rec_per_key*= pkinfo->real_rec_per_key(used_pk_parts-1); - rec_per_key/= pkinfo->real_rec_per_key(0); + rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1); + rec_per_key/= pkinfo->actual_rec_per_key(0); /* The value of rec_per_key for the extended key has to be adjusted accordingly if some components of @@ -23004,9 +23004,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, We presume here that for any index rec_per_key[i] != 0 if rec_per_key[0] != 0. */ - DBUG_ASSERT(pkinfo->real_rec_per_key(i)); - rec_per_key*= pkinfo->real_rec_per_key(i-1); - rec_per_key/= pkinfo->real_rec_per_key(i); + DBUG_ASSERT(pkinfo->actual_rec_per_key(i)); + rec_per_key*= pkinfo->actual_rec_per_key(i-1); + rec_per_key/= pkinfo->actual_rec_per_key(i); } } } @@ -23051,7 +23051,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, select_limit= (ha_rows) (select_limit * (double) table_records / table->quick_condition_rows); - rec_per_key= keyinfo->real_rec_per_key(keyinfo->key_parts-1); + rec_per_key= keyinfo->actual_rec_per_key(keyinfo->key_parts-1); set_if_bigger(rec_per_key, 1); /* Here we take into account the fact that rows are diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 19a25c28942..861509b30de 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -45,6 +45,7 @@ #include "set_var.h" #include "sql_trigger.h" #include "sql_derived.h" +#include "sql_statistics.h" #include "sql_connect.h" #include "authors.h" #include "contributors.h" @@ -5765,7 +5766,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, if (key->rec_per_key[j]) { ha_rows records=((double) show_table->stat_records() / - key->real_rec_per_key(j)); + key->actual_rec_per_key(j)); table->field[9]->store((longlong) records, TRUE); table->field[9]->set_notnull(); } diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 93d3a1c581f..1cb4398caa2 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -206,6 +206,8 @@ private: Count_distinct_field *count_distinct; /* The container for distinct column values */ + bool is_single_pk_col; /* TRUE <-> the only column of the primary key */ + public: inline void init(THD *thd, Field * table_field); @@ -1399,6 +1401,8 @@ private: public: + bool is_single_comp_pk; + Index_prefix_calc(TABLE *table, KEY *key_info) : index_table(table), index_info(key_info) { @@ -1407,6 +1411,16 @@ public: uint key_parts= table->actual_n_key_parts(key_info); empty= TRUE; prefixes= 0; + + is_single_comp_pk= FALSE; + uint pk= table->s->primary_key; + if (table->key_info - key_info == pk && table->key_info[pk].key_parts == 1) + { + prefixes= 1; + is_single_comp_pk= TRUE; + return; + } + if ((calc_state= (Prefix_calc_state *) sql_alloc(sizeof(Prefix_calc_state)*key_parts))) { @@ -1430,6 +1444,7 @@ public: } } + /** @breif Change the elements of calc_state after reading the next index entry @@ -1487,6 +1502,13 @@ public: { uint i; Prefix_calc_state *state; + + if (is_single_comp_pk) + { + index_info->collected_stats->set_avg_frequency(0, 1.0); + return; + } + for (i= 0, state= calc_state; i < prefixes; i++, state++) { if (i < prefixes) @@ -1658,7 +1680,7 @@ void create_min_max_stistical_fields_for_table_share(THD *thd, int alloc_statistics_for_table(THD* thd, TABLE *table) { Field **field_ptr; - uint cnt= 0; + uint fields; DBUG_ENTER("alloc_statistics_for_table"); @@ -1666,10 +1688,11 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) (Table_statistics *) alloc_root(&table->mem_root, sizeof(Table_statistics)); - for (field_ptr= table->field; *field_ptr; field_ptr++, cnt++) ; + fields= table->s->fields ; Column_statistics_collected *column_stats= (Column_statistics_collected *) alloc_root(&table->mem_root, - sizeof(Column_statistics_collected) * cnt); + sizeof(Column_statistics_collected) * + fields); uint keys= table->s->keys; Index_statistics *index_stats= @@ -1688,7 +1711,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) table_stats->index_stats= index_stats; table_stats->idx_avg_frequency= idx_avg_frequency; - memset(column_stats, 0, sizeof(Column_statistics) * cnt); + memset(column_stats, 0, sizeof(Column_statistics) * fields); for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++) (*field_ptr)->collected_stats= column_stats; @@ -1838,13 +1861,23 @@ inline void Column_statistics_collected::init(THD *thd, Field *table_field) { uint max_heap_table_size= thd->variables.max_heap_table_size; + TABLE *table= table_field->table; + uint pk= table->s->primary_key; + + is_single_pk_col= FALSE; + if (pk != MAX_KEY && table->key_info[pk].key_parts == 1 && + table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1) + is_single_pk_col= TRUE; + column= table_field; set_all_nulls(); nulls= 0; column_total_length= 0; + if (is_single_pk_col) + count_distinct= NULL; if (table_field->flags & BLOB_FLAG) count_distinct= NULL; else @@ -1923,6 +1956,12 @@ void Column_statistics_collected::finish(ha_rows rows) delete count_distinct; count_distinct= NULL; } + else if (is_single_pk_col) + { + val= 1.0; + set_avg_frequency(val); + set_not_null(COLUMN_STAT_AVG_FREQUENCY); + } } @@ -1986,6 +2025,12 @@ int collect_statistics_for_index(THD *thd, TABLE *table, uint index) DEBUG_SYNC(table->in_use, "statistics_collection_start1"); DEBUG_SYNC(table->in_use, "statistics_collection_start2"); + if (index_prefix_calc.is_single_comp_pk) + { + index_prefix_calc.get_avg_frequency(); + DBUG_RETURN(rc); + } + table->key_read= 1; table->file->extra(HA_EXTRA_KEYREAD); @@ -2078,7 +2123,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table) table->collected_stats->cardinality_is_null= TRUE; table->collected_stats->cardinality= 0; - + for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; @@ -2949,9 +2994,9 @@ int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, void set_statistics_for_table(THD *thd, TABLE *table) { - uint use_stat_table_mode= thd->variables.use_stat_tables; + Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd); table->used_stat_records= - (use_stat_table_mode <= 1 || + (use_stat_table_mode <= COMPLEMENTARY || !table->s->stats_is_read || !table->s->read_stats || table->s->read_stats->cardinality_is_null) ? table->file->stats.records : table->s->read_stats->cardinality; @@ -2960,7 +3005,8 @@ void set_statistics_for_table(THD *thd, TABLE *table) key_info < key_info_end; key_info++) { key_info->is_statistics_from_stat_tables= - (use_stat_table_mode > 1 && table->s->stats_is_read && + (use_stat_table_mode > COMPLEMENTARY && + table->s->stats_is_read && key_info->read_stats && key_info->read_stats->avg_frequency_is_inited() && key_info->read_stats->get_avg_frequency(0) > 0.5); diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 14a16170c3b..e24e4beae74 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -25,6 +25,14 @@ generated automatically by the table definitions. */ +typedef +enum enum_use_stat_tables_mode +{ + NEVER, + COMPLEMENTARY, + PEFERABLY, +} Use_stat_tables_mode; + enum enum_stat_tables { TABLE_STAT, @@ -60,6 +68,27 @@ enum enum_index_stat_col INDEX_STAT_AVG_FREQUENCY }; +inline +Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) +{ + return (Use_stat_tables_mode) (thd->variables.use_stat_tables); +} + +int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); +int collect_statistics_for_table(THD *thd, TABLE *table); +int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share, + bool is_safe); +int alloc_statistics_for_table(THD *thd, TABLE *table); +int update_statistics_for_table(THD *thd, TABLE *table); +int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab); +int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col); +int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info, + bool ext_prefixes_only); +int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab, + LEX_STRING *new_db, LEX_STRING *new_tab); +int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, + const char *new_name); +void set_statistics_for_table(THD *thd, TABLE *table); class Columns_statistics; class Index_statistics; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index b63c1859582..5bd2b105a92 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -43,6 +43,7 @@ #include "discover.h" // readfrm #include "my_pthread.h" // pthread_mutex_t #include "log_event.h" // Query_log_event +#include "sql_statistics.h" #include <hash.h> #include <myisam.h> #include <my_dir.h> diff --git a/sql/structs.h b/sql/structs.h index 13bb0574b24..a3a54c524e6 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -142,7 +142,7 @@ typedef struct st_key { engine_option_value *option_list; ha_index_option_struct *option_struct; /* structure with parsed options */ - double real_rec_per_key(uint i); + double actual_rec_per_key(uint i); } KEY; diff --git a/sql/table.cc b/sql/table.cc index 11a8c553566..3ce0074a86f 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -6783,7 +6783,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd) } -double KEY::real_rec_per_key(uint i) +double KEY::actual_rec_per_key(uint i) { if (rec_per_key == 0) return 0; |