summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorIgor Babaev <igor@askmonty.org>2012-12-13 23:05:12 -0800
committerIgor Babaev <igor@askmonty.org>2012-12-13 23:05:12 -0800
commita06224bd1594ea1da650f748a8956922eafd2363 (patch)
tree99fc4c066ce8e3b9c0037333c62b993a787458d8 /sql
parent65820439bdafeead66496b489c076012c334c710 (diff)
downloadmariadb-git-a06224bd1594ea1da650f748a8956922eafd2363.tar.gz
Addressed all remaining issues from the review of the patch
that introduced engine independent persistent statistics. In particular: - added an enumeration type for possible values of the system variable use_stat_tables - renamed KEY::real_rec_per_key to KEY::actual_rec_per_key - optimized the collection of statistical data for any primary key defined only on one column.
Diffstat (limited to 'sql')
-rw-r--r--sql/multi_range_read.cc2
-rw-r--r--sql/opt_range.cc8
-rw-r--r--sql/sql_admin.cc3
-rw-r--r--sql/sql_base.cc5
-rw-r--r--sql/sql_base.h16
-rw-r--r--sql/sql_db.cc1
-rw-r--r--sql/sql_delete.cc1
-rw-r--r--sql/sql_join_cache.cc3
-rw-r--r--sql/sql_rename.cc1
-rw-r--r--sql/sql_select.cc18
-rw-r--r--sql/sql_show.cc3
-rw-r--r--sql/sql_statistics.cc62
-rw-r--r--sql/sql_statistics.h29
-rw-r--r--sql/sql_table.cc1
-rw-r--r--sql/structs.h2
-rw-r--r--sql/table.cc2
16 files changed, 112 insertions, 45 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 1361d5822c9..b9f49a83b4b 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1201,7 +1201,7 @@ bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf,
uint parts= my_count_bits(key_tuple_map);
ulong rpc;
ulonglong rowids_size= rowid_buf_elem_size;
- if ((rpc= key_info->real_rec_per_key(parts - 1)))
+ if ((rpc= key_info->actual_rec_per_key(parts - 1)))
rowids_size= rowid_buf_elem_size * rpc;
double fraction_for_rowids=
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index ab4e696c21c..6f13816d8ea 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -5502,8 +5502,8 @@ ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
ha_rows ext_records= ext_index_scan->records;
if (i < used_key_parts)
{
- ulong f1= key_info->real_rec_per_key(i-1);
- ulong f2= key_info->real_rec_per_key(i);
+ ulong f1= key_info->actual_rec_per_key(i-1);
+ ulong f2= key_info->actual_rec_per_key(i);
ext_records= (ha_rows) ((double) ext_records / f2 * f1);
}
if (ext_records < table_cardinality)
@@ -12642,7 +12642,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
num_blocks= (uint)(table_records / keys_per_block) + 1;
/* Compute the number of keys in a group. */
- keys_per_group= index_info->real_rec_per_key(group_key_parts - 1);
+ keys_per_group= index_info->actual_rec_per_key(group_key_parts - 1);
if (keys_per_group == 0) /* If there is no statistics try to guess */
/* each group contains 10% of all records */
keys_per_group= (uint)(table_records / 10) + 1;
@@ -12662,7 +12662,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
Compute the probability that two ends of a subgroup are inside
different blocks.
*/
- keys_per_subgroup= index_info->real_rec_per_key(used_key_parts - 1);
+ keys_per_subgroup= index_info->actual_rec_per_key(used_key_parts - 1);
if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
else
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index f2124dd3bb8..e6bbef482a7 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -29,6 +29,7 @@
#include "sql_parse.h" // check_table_access
#include "strfunc.h"
#include "sql_admin.h"
+#include "sql_statistics.h"
/* Prepare, run and cleanup for mysql_recreate_table() */
@@ -718,7 +719,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
if (compl_result_code == HA_ADMIN_OK &&
operator_func == &handler::ha_analyze &&
table->table->s->table_category == TABLE_CATEGORY_USER &&
- (thd->variables.use_stat_tables > 0 ||
+ (get_use_stat_tables_mode(thd) > NEVER ||
lex->with_persistent_for_clause))
{
if (!(compl_result_code=
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 9db8ac1c732..a582dde4277 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -49,6 +49,7 @@
#include "sql_trigger.h"
#include "transaction.h"
#include "sql_prepare.h"
+#include "sql_statistics.h"
#include <m_ctype.h>
#include <my_dir.h>
#include <hash.h>
@@ -3142,7 +3143,7 @@ retry_share:
while (table_cache_count > table_cache_size && unused_tables)
free_cache_entry(unused_tables);
- if (thd->variables.use_stat_tables > 0)
+ if (get_use_stat_tables_mode(thd) > NEVER)
{
if (share->table_category != TABLE_CATEGORY_SYSTEM)
{
@@ -4634,7 +4635,7 @@ open_and_process_table(THD *thd, LEX *lex, TABLE_LIST *tables,
goto end;
}
- if (thd->variables.use_stat_tables > 0 && tables->table)
+ if (get_use_stat_tables_mode(thd) > NEVER && tables->table)
{
TABLE_SHARE *table_share= tables->table->s;
if (table_share && table_share->table_category != TABLE_CATEGORY_SYSTEM)
diff --git a/sql/sql_base.h b/sql/sql_base.h
index c4cd7f467a0..aa2ba9e5680 100644
--- a/sql/sql_base.h
+++ b/sql/sql_base.h
@@ -310,22 +310,6 @@ int dynamic_column_error_message(enum_dyncol_func_result rc);
/* open_and_lock_tables with optional derived handling */
int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived);
-int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
-int collect_statistics_for_table(THD *thd, TABLE *table);
-int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
- bool is_safe);
-int alloc_statistics_for_table(THD *thd, TABLE *table);
-int update_statistics_for_table(THD *thd, TABLE *table);
-int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab);
-int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
-int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
- bool ext_prefixes_only);
-int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab,
- LEX_STRING *new_db, LEX_STRING *new_tab);
-int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
- const char *new_name);
-void set_statistics_for_table(THD *thd, TABLE *table);
-
extern "C" int simple_raw_key_cmp(void* arg, const void* key1,
const void* key2);
extern "C" int count_distinct_walk(void *elem, element_count count, void *arg);
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index 086445948bd..f4e9ccfc5e6 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -36,6 +36,7 @@
#include "sp.h"
#include "events.h"
#include "sql_handler.h"
+#include "sql_statistics.h"
#include <my_dir.h>
#include <m_ctype.h>
#include "log.h"
diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc
index 2dd090578a7..4e35072d508 100644
--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@@ -35,6 +35,7 @@
#include "sql_select.h"
#include "sp_head.h"
#include "sql_trigger.h"
+#include "sql_statistics.h"
#include "transaction.h"
#include "records.h" // init_read_record,
#include "sql_derived.h" // mysql_handle_list_of_derived
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index 225bb413195..ac96746c389 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -3812,7 +3812,8 @@ uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno)
uint incr= 0;
TABLE_REF *ref= &join_tab->ref;
TABLE *tab= join_tab->table;
- uint rec_per_key= tab->key_info[ref->key].real_rec_per_key(ref->key_parts-1);
+ uint rec_per_key=
+ tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1);
set_if_bigger(rec_per_key, 1);
if (recno == 1)
incr= ref->key_length + tab->file->ref_length;
diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc
index d27989c3b01..c91623cee6e 100644
--- a/sql/sql_rename.cc
+++ b/sql/sql_rename.cc
@@ -28,6 +28,7 @@
#include "lock.h" // MYSQL_OPEN_SKIP_TEMPORARY
#include "sql_base.h" // tdc_remove_table, lock_table_names,
#include "sql_handler.h" // mysql_ha_rm_tables
+#include "sql_statistics.h"
#include "datadict.h"
static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list,
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index fcb650b4757..85908623324 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5416,7 +5416,7 @@ best_access_path(JOIN *join,
else
{
uint key_parts= table->actual_n_key_parts(keyinfo);
- if (!(records= keyinfo->real_rec_per_key(key_parts-1)))
+ if (!(records= keyinfo->actual_rec_per_key(key_parts-1)))
{ /* Prefer longer keys */
records=
((double) s->records / (double) rec *
@@ -5516,7 +5516,7 @@ best_access_path(JOIN *join,
else
{
/* Check if we have statistic about the distribution */
- if ((records= keyinfo->real_rec_per_key(max_key_part-1)))
+ if ((records= keyinfo->actual_rec_per_key(max_key_part-1)))
{
/*
Fix for the case where the index statistics is too
@@ -22974,7 +22974,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (used_key_parts > used_index_parts)
used_pk_parts= used_key_parts-used_index_parts;
rec_per_key= used_key_parts ?
- keyinfo->real_rec_per_key(used_key_parts-1) : 1;
+ keyinfo->actual_rec_per_key(used_key_parts-1) : 1;
/* Take into account the selectivity of the used pk prefix */
if (used_pk_parts)
{
@@ -22989,8 +22989,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
rec_per_key= 1;
if (rec_per_key > 1)
{
- rec_per_key*= pkinfo->real_rec_per_key(used_pk_parts-1);
- rec_per_key/= pkinfo->real_rec_per_key(0);
+ rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1);
+ rec_per_key/= pkinfo->actual_rec_per_key(0);
/*
The value of rec_per_key for the extended key has
to be adjusted accordingly if some components of
@@ -23004,9 +23004,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
We presume here that for any index rec_per_key[i] != 0
if rec_per_key[0] != 0.
*/
- DBUG_ASSERT(pkinfo->real_rec_per_key(i));
- rec_per_key*= pkinfo->real_rec_per_key(i-1);
- rec_per_key/= pkinfo->real_rec_per_key(i);
+ DBUG_ASSERT(pkinfo->actual_rec_per_key(i));
+ rec_per_key*= pkinfo->actual_rec_per_key(i-1);
+ rec_per_key/= pkinfo->actual_rec_per_key(i);
}
}
}
@@ -23051,7 +23051,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
select_limit= (ha_rows) (select_limit *
(double) table_records /
table->quick_condition_rows);
- rec_per_key= keyinfo->real_rec_per_key(keyinfo->key_parts-1);
+ rec_per_key= keyinfo->actual_rec_per_key(keyinfo->key_parts-1);
set_if_bigger(rec_per_key, 1);
/*
Here we take into account the fact that rows are
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 19a25c28942..861509b30de 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -45,6 +45,7 @@
#include "set_var.h"
#include "sql_trigger.h"
#include "sql_derived.h"
+#include "sql_statistics.h"
#include "sql_connect.h"
#include "authors.h"
#include "contributors.h"
@@ -5765,7 +5766,7 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables,
if (key->rec_per_key[j])
{
ha_rows records=((double) show_table->stat_records() /
- key->real_rec_per_key(j));
+ key->actual_rec_per_key(j));
table->field[9]->store((longlong) records, TRUE);
table->field[9]->set_notnull();
}
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 93d3a1c581f..1cb4398caa2 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -206,6 +206,8 @@ private:
Count_distinct_field *count_distinct; /* The container for distinct
column values */
+ bool is_single_pk_col; /* TRUE <-> the only column of the primary key */
+
public:
inline void init(THD *thd, Field * table_field);
@@ -1399,6 +1401,8 @@ private:
public:
+ bool is_single_comp_pk;
+
Index_prefix_calc(TABLE *table, KEY *key_info)
: index_table(table), index_info(key_info)
{
@@ -1407,6 +1411,16 @@ public:
uint key_parts= table->actual_n_key_parts(key_info);
empty= TRUE;
prefixes= 0;
+
+ is_single_comp_pk= FALSE;
+ uint pk= table->s->primary_key;
+ if (table->key_info - key_info == pk && table->key_info[pk].key_parts == 1)
+ {
+ prefixes= 1;
+ is_single_comp_pk= TRUE;
+ return;
+ }
+
if ((calc_state=
(Prefix_calc_state *) sql_alloc(sizeof(Prefix_calc_state)*key_parts)))
{
@@ -1430,6 +1444,7 @@ public:
}
}
+
/**
@breif
Change the elements of calc_state after reading the next index entry
@@ -1487,6 +1502,13 @@ public:
{
uint i;
Prefix_calc_state *state;
+
+ if (is_single_comp_pk)
+ {
+ index_info->collected_stats->set_avg_frequency(0, 1.0);
+ return;
+ }
+
for (i= 0, state= calc_state; i < prefixes; i++, state++)
{
if (i < prefixes)
@@ -1658,7 +1680,7 @@ void create_min_max_stistical_fields_for_table_share(THD *thd,
int alloc_statistics_for_table(THD* thd, TABLE *table)
{
Field **field_ptr;
- uint cnt= 0;
+ uint fields;
DBUG_ENTER("alloc_statistics_for_table");
@@ -1666,10 +1688,11 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
(Table_statistics *) alloc_root(&table->mem_root,
sizeof(Table_statistics));
- for (field_ptr= table->field; *field_ptr; field_ptr++, cnt++) ;
+ fields= table->s->fields ;
Column_statistics_collected *column_stats=
(Column_statistics_collected *) alloc_root(&table->mem_root,
- sizeof(Column_statistics_collected) * cnt);
+ sizeof(Column_statistics_collected) *
+ fields);
uint keys= table->s->keys;
Index_statistics *index_stats=
@@ -1688,7 +1711,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
table_stats->index_stats= index_stats;
table_stats->idx_avg_frequency= idx_avg_frequency;
- memset(column_stats, 0, sizeof(Column_statistics) * cnt);
+ memset(column_stats, 0, sizeof(Column_statistics) * fields);
for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++)
(*field_ptr)->collected_stats= column_stats;
@@ -1838,13 +1861,23 @@ inline
void Column_statistics_collected::init(THD *thd, Field *table_field)
{
uint max_heap_table_size= thd->variables.max_heap_table_size;
+ TABLE *table= table_field->table;
+ uint pk= table->s->primary_key;
+
+ is_single_pk_col= FALSE;
+ if (pk != MAX_KEY && table->key_info[pk].key_parts == 1 &&
+ table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1)
+ is_single_pk_col= TRUE;
+
column= table_field;
set_all_nulls();
nulls= 0;
column_total_length= 0;
+ if (is_single_pk_col)
+ count_distinct= NULL;
if (table_field->flags & BLOB_FLAG)
count_distinct= NULL;
else
@@ -1923,6 +1956,12 @@ void Column_statistics_collected::finish(ha_rows rows)
delete count_distinct;
count_distinct= NULL;
}
+ else if (is_single_pk_col)
+ {
+ val= 1.0;
+ set_avg_frequency(val);
+ set_not_null(COLUMN_STAT_AVG_FREQUENCY);
+ }
}
@@ -1986,6 +2025,12 @@ int collect_statistics_for_index(THD *thd, TABLE *table, uint index)
DEBUG_SYNC(table->in_use, "statistics_collection_start1");
DEBUG_SYNC(table->in_use, "statistics_collection_start2");
+ if (index_prefix_calc.is_single_comp_pk)
+ {
+ index_prefix_calc.get_avg_frequency();
+ DBUG_RETURN(rc);
+ }
+
table->key_read= 1;
table->file->extra(HA_EXTRA_KEYREAD);
@@ -2078,7 +2123,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
table->collected_stats->cardinality_is_null= TRUE;
table->collected_stats->cardinality= 0;
-
+
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
table_field= *field_ptr;
@@ -2949,9 +2994,9 @@ int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
void set_statistics_for_table(THD *thd, TABLE *table)
{
- uint use_stat_table_mode= thd->variables.use_stat_tables;
+ Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
table->used_stat_records=
- (use_stat_table_mode <= 1 ||
+ (use_stat_table_mode <= COMPLEMENTARY ||
!table->s->stats_is_read || !table->s->read_stats ||
table->s->read_stats->cardinality_is_null) ?
table->file->stats.records : table->s->read_stats->cardinality;
@@ -2960,7 +3005,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
key_info < key_info_end; key_info++)
{
key_info->is_statistics_from_stat_tables=
- (use_stat_table_mode > 1 && table->s->stats_is_read &&
+ (use_stat_table_mode > COMPLEMENTARY &&
+ table->s->stats_is_read &&
key_info->read_stats &&
key_info->read_stats->avg_frequency_is_inited() &&
key_info->read_stats->get_avg_frequency(0) > 0.5);
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 14a16170c3b..e24e4beae74 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -25,6 +25,14 @@
generated automatically by the table definitions.
*/
+typedef
+enum enum_use_stat_tables_mode
+{
+ NEVER,
+ COMPLEMENTARY,
+ PEFERABLY,
+} Use_stat_tables_mode;
+
enum enum_stat_tables
{
TABLE_STAT,
@@ -60,6 +68,27 @@ enum enum_index_stat_col
INDEX_STAT_AVG_FREQUENCY
};
+inline
+Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
+{
+ return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
+}
+
+int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
+int collect_statistics_for_table(THD *thd, TABLE *table);
+int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share,
+ bool is_safe);
+int alloc_statistics_for_table(THD *thd, TABLE *table);
+int update_statistics_for_table(THD *thd, TABLE *table);
+int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab);
+int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col);
+int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info,
+ bool ext_prefixes_only);
+int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab,
+ LEX_STRING *new_db, LEX_STRING *new_tab);
+int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col,
+ const char *new_name);
+void set_statistics_for_table(THD *thd, TABLE *table);
class Columns_statistics;
class Index_statistics;
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index b63c1859582..5bd2b105a92 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -43,6 +43,7 @@
#include "discover.h" // readfrm
#include "my_pthread.h" // pthread_mutex_t
#include "log_event.h" // Query_log_event
+#include "sql_statistics.h"
#include <hash.h>
#include <myisam.h>
#include <my_dir.h>
diff --git a/sql/structs.h b/sql/structs.h
index 13bb0574b24..a3a54c524e6 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -142,7 +142,7 @@ typedef struct st_key {
engine_option_value *option_list;
ha_index_option_struct *option_struct; /* structure with parsed options */
- double real_rec_per_key(uint i);
+ double actual_rec_per_key(uint i);
} KEY;
diff --git a/sql/table.cc b/sql/table.cc
index 11a8c553566..3ce0074a86f 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -6783,7 +6783,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd)
}
-double KEY::real_rec_per_key(uint i)
+double KEY::actual_rec_per_key(uint i)
{
if (rec_per_key == 0)
return 0;