diff options
author | Igor Babaev <igor@askmonty.org> | 2013-01-10 22:33:23 -0800 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2013-01-10 22:33:23 -0800 |
commit | 1ef07d0845300ad200671f9809dcd9ee8b2ff837 (patch) | |
tree | 5c22187a94c73c7f8099a5d366ab3fd425f8e578 /sql | |
parent | 421bbdcf1004806ccbee7b45d55e90ddc15c76ae (diff) | |
parent | f853333e09b222f6d6d76d9343b6202b4769ffab (diff) | |
download | mariadb-git-1ef07d0845300ad200671f9809dcd9ee8b2ff837.tar.gz |
Merge 10.0-base -> 10.0.
Also fixed a bug in sql_update.cc: the code of mysql_update() lacked
a call of set_statistics_for_table().
Diffstat (limited to 'sql')
37 files changed, 4189 insertions, 168 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 0174ff822f3..98f779a8b60 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -64,7 +64,8 @@ SET (SQL_SOURCE sql_parse.cc sql_bootstrap.cc sql_bootstrap.h sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc debug_sync.cc debug_sync.h - sql_repl.cc sql_select.cc sql_show.cc sql_state.c sql_string.cc + sql_repl.cc sql_select.cc sql_show.cc sql_state.c + sql_statistics.cc sql_string.cc sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc sql_time.cc tztime.cc uniques.cc unireg.cc item_xmlfunc.cc diff --git a/sql/field.cc b/sql/field.cc index a18c72119be..f3e3ee938d6 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -71,7 +71,7 @@ const char field_separator=','; ((ulong) ((LL(1) << min(arg, 4) * 8) - LL(1))) #define ASSERT_COLUMN_MARKED_FOR_READ DBUG_ASSERT(!table || (!table->read_set || bitmap_is_set(table->read_set, field_index))) -#define ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED DBUG_ASSERT(!table || (!table->write_set || bitmap_is_set(table->write_set, field_index) || bitmap_is_set(table->vcol_set, field_index))) +#define ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED DBUG_ASSERT(is_stat_field || !table || (!table->write_set || bitmap_is_set(table->write_set, field_index) || bitmap_is_set(table->vcol_set, field_index))) #define FLAGSTR(S,F) ((S) & (F) ? #F " " : "") @@ -1175,11 +1175,11 @@ int Field_num::check_int(CHARSET_INFO *cs, const char *str, int length, if (str == int_end || error == MY_ERRNO_EDOM) { ErrConvString err(str, length, cs); - push_warning_printf(table->in_use, MYSQL_ERROR::WARN_LEVEL_WARN, + push_warning_printf(get_thd(), MYSQL_ERROR::WARN_LEVEL_WARN, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), "integer", err.ptr(), field_name, - (ulong) table->in_use->warning_info->current_row_for_warning()); + (ulong) get_thd()->warning_info->current_row_for_warning()); return 1; } /* Test if we have garbage at the end of the given string. */ @@ -1248,7 +1248,7 @@ bool Field_num::get_int(CHARSET_INFO *cs, const char *from, uint len, goto out_of_range; } } - if (table->in_use->count_cuted_fields && + if (get_thd()->count_cuted_fields && check_int(cs, from, len, end, error)) return 1; return 0; @@ -1319,13 +1319,16 @@ Field::Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg, option_struct(0), key_start(0), part_of_key(0), part_of_key_not_clustered(0), part_of_sortkey(0), unireg_check(unireg_check_arg), field_length(length_arg), - null_bit(null_bit_arg), is_created_from_null_item(FALSE), vcol_info(0), + null_bit(null_bit_arg), is_created_from_null_item(FALSE), + read_stats(NULL), collected_stats(0), + vcol_info(0), stored_in_db(TRUE) { flags=null_ptr ? 0: NOT_NULL_FLAG; comment.str= (char*) ""; comment.length=0; - field_index= 0; + field_index= 0; + is_stat_field= FALSE; } @@ -1425,10 +1428,11 @@ int Field::store(const char *to, uint length, CHARSET_INFO *cs, enum_check_fields check_level) { int res; - enum_check_fields old_check_level= table->in_use->count_cuted_fields; - table->in_use->count_cuted_fields= check_level; + THD *thd= get_thd(); + enum_check_fields old_check_level= thd->count_cuted_fields; + thd->count_cuted_fields= check_level; res= store(to, length, cs); - table->in_use->count_cuted_fields= old_check_level; + thd->count_cuted_fields= old_check_level; return res; } @@ -1854,6 +1858,32 @@ Field *Field::clone(MEM_ROOT *root, TABLE *new_table) } + +Field *Field::clone(MEM_ROOT *root, TABLE *new_table, my_ptrdiff_t diff, + bool stat_flag) +{ + Field *tmp; + if ((tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + { + tmp->init(new_table); + tmp->move_field_offset(diff); + } + tmp->is_stat_field= stat_flag; + return tmp; +} + + +Field *Field::clone(MEM_ROOT *root, my_ptrdiff_t diff) +{ + Field *tmp; + if ((tmp= (Field*) memdup_root(root,(char*) this,size_of()))) + { + tmp->move_field_offset(diff); + } + return tmp; +} + + /**************************************************************************** Field_null, a field that always return NULL ****************************************************************************/ @@ -1968,7 +1998,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs) uchar *left_wall,*right_wall; uchar tmp_char; /* - To remember if table->in_use->cuted_fields has already been incremented, + To remember if get_thd()->cuted_fields has already been incremented, to do that only once */ bool is_cuted_fields_incr=0; @@ -2059,7 +2089,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs) it makes the code easer to read. */ - if (table->in_use->count_cuted_fields) + if (get_thd()->count_cuted_fields) { // Skip end spaces for (;from != end && my_isspace(&my_charset_bin, *from); from++) ; @@ -2211,7 +2241,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs) /* Write digits of the frac_% parts ; - Depending on table->in_use->count_cutted_fields, we may also want + Depending on get_thd()->count_cutted_fields, we may also want to know if some non-zero tail of these parts will be truncated (for example, 0.002->0.00 will generate a warning, while 0.000->0.00 will not) @@ -2229,7 +2259,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs) { if (pos == right_wall) { - if (table->in_use->count_cuted_fields && !is_cuted_fields_incr) + if (get_thd()->count_cuted_fields && !is_cuted_fields_incr) break; // Go on below to see if we lose non zero digits return 0; } @@ -2650,20 +2680,21 @@ int Field_new_decimal::store(const char *from, uint length, ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; int err; my_decimal decimal_value; + THD *thd= get_thd(); DBUG_ENTER("Field_new_decimal::store(char*)"); if ((err= str2my_decimal(E_DEC_FATAL_ERROR & ~(E_DEC_OVERFLOW | E_DEC_BAD_NUM), from, length, charset_arg, &decimal_value)) && - table->in_use->abort_on_warning) + thd->abort_on_warning) { ErrConvString errmsg(from, length, &my_charset_bin); - push_warning_printf(table->in_use, MYSQL_ERROR::WARN_LEVEL_WARN, + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), "decimal", errmsg.ptr(), field_name, - (ulong) table->in_use->warning_info->current_row_for_warning()); + (ulong) thd->warning_info->current_row_for_warning()); DBUG_RETURN(err); } @@ -2679,11 +2710,11 @@ int Field_new_decimal::store(const char *from, uint length, case E_DEC_BAD_NUM: { ErrConvString errmsg(from, length, &my_charset_bin); - push_warning_printf(table->in_use, MYSQL_ERROR::WARN_LEVEL_WARN, + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_TRUNCATED_WRONG_VALUE_FOR_FIELD, ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD), "decimal", errmsg.ptr(), field_name, - (ulong) table->in_use->warning_info-> + (ulong) thd->warning_info-> current_row_for_warning()); my_decimal_set_zero(&decimal_value); break; @@ -2711,6 +2742,7 @@ int Field_new_decimal::store(double nr) ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; my_decimal decimal_value; int err; + THD *thd= get_thd(); DBUG_ENTER("Field_new_decimal::store(double)"); err= double2my_decimal(E_DEC_FATAL_ERROR & ~E_DEC_OVERFLOW, nr, @@ -2720,11 +2752,11 @@ int Field_new_decimal::store(double nr) if (check_overflow(err)) set_value_on_overflow(&decimal_value, decimal_value.sign()); /* Only issue a warning if store_value doesn't issue an warning */ - table->in_use->got_warning= 0; + thd->got_warning= 0; } if (store_value(&decimal_value)) err= 1; - else if (err && !table->in_use->got_warning) + else if (err && !thd->got_warning) err= warn_if_overflow(err); DBUG_RETURN(err); } @@ -2742,11 +2774,11 @@ int Field_new_decimal::store(longlong nr, bool unsigned_val) if (check_overflow(err)) set_value_on_overflow(&decimal_value, decimal_value.sign()); /* Only issue a warning if store_value doesn't issue an warning */ - table->in_use->got_warning= 0; + get_thd()->got_warning= 0; } if (store_value(&decimal_value)) err= 1; - else if (err && !table->in_use->got_warning) + else if (err && !get_thd()->got_warning) err= warn_if_overflow(err); return err; } @@ -3642,7 +3674,7 @@ longlong Field_long::val_int(void) ASSERT_COLUMN_MARKED_FOR_READ; int32 j; /* See the comment in Field_long::store(long long) */ - DBUG_ASSERT(table->in_use == current_thd); + DBUG_ASSERT(!table || table->in_use == current_thd); j=sint4korr(ptr); return unsigned_flag ? (longlong) (uint32) j : (longlong) j; } @@ -3724,7 +3756,7 @@ int Field_longlong::store(const char *from,uint len,CHARSET_INFO *cs) set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); error= 1; } - else if (table->in_use->count_cuted_fields && + else if (get_thd()->count_cuted_fields && check_int(cs, from, len, end, error)) error= 1; else @@ -3876,7 +3908,7 @@ int Field_float::store(const char *from,uint len,CHARSET_INFO *cs) char *end; double nr= my_strntod(cs,(char*) from,len,&end,&error); if (error || (!len || ((uint) (end-from) != len && - table->in_use->count_cuted_fields))) + get_thd()->count_cuted_fields))) { set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1); @@ -4064,7 +4096,7 @@ int Field_double::store(const char *from,uint len,CHARSET_INFO *cs) char *end; double nr= my_strntod(cs,(char*) from, len, &end, &error); if (error || (!len || ((uint) (end-from) != len && - table->in_use->count_cuted_fields))) + get_thd()->count_cuted_fields))) { set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, (error ? ER_WARN_DATA_OUT_OF_RANGE : WARN_DATA_TRUNCATED), 1); @@ -4474,10 +4506,11 @@ int Field_timestamp::store_TIME_with_warning(THD *thd, MYSQL_TIME *l_time, int Field_timestamp::store_time_dec(MYSQL_TIME *ltime, uint dec) { - THD *thd= table->in_use; int unused; MYSQL_TIME l_time= *ltime; ErrConvTime str(ltime); + THD *thd= get_thd(); + bool valid= !check_date(&l_time, pack_time(&l_time) != 0, (thd->variables.sql_mode & MODE_NO_ZERO_DATE) | MODE_NO_ZERO_IN_DATE, &unused); @@ -4492,7 +4525,7 @@ int Field_timestamp::store(const char *from,uint len,CHARSET_INFO *cs) int error; int have_smth_to_conv; ErrConvString str(from, len, cs); - THD *thd= table->in_use; + THD *thd= get_thd(); /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */ have_smth_to_conv= (str_to_datetime(cs, from, len, &l_time, @@ -4509,7 +4542,7 @@ int Field_timestamp::store(double nr) MYSQL_TIME l_time; int error; ErrConvDouble str(nr); - THD *thd= table->in_use; + THD *thd= get_thd(); longlong tmp= double_to_datetime(nr, &l_time, (thd->variables.sql_mode & MODE_NO_ZERO_DATE) | @@ -4523,7 +4556,7 @@ int Field_timestamp::store(longlong nr, bool unsigned_val) MYSQL_TIME l_time; int error; ErrConvInteger str(nr); - THD *thd= table->in_use; + THD *thd= get_thd(); /* We don't want to store invalid or fuzzy datetime values in TIMESTAMP */ longlong tmp= number_to_datetime(nr, 0, &l_time, (thd->variables.sql_mode & @@ -4615,7 +4648,7 @@ String *Field_timestamp::val_str(String *val_buffer, String *val_ptr) bool Field_timestamp::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) { - THD *thd= table->in_use; + THD *thd= get_thd(); thd->time_zone_used= 1; ulong sec_part; my_time_t temp= get_timestamp(&sec_part); @@ -4668,7 +4701,7 @@ void Field_timestamp::sql_type(String &res) const int Field_timestamp::set_time() { - THD *thd= table->in_use; + THD *thd= get_thd(); set_notnull(); store_TIME(thd->query_start(), 0); return 0; @@ -4844,7 +4877,7 @@ int Field_timestamp_hires::store_decimal(const my_decimal *d) int error; MYSQL_TIME ltime; longlong tmp; - THD *thd= table->in_use; + THD *thd= get_thd(); ErrConvDecimal str(d); if (my_decimal2seconds(d, &nr, &sec_part)) @@ -4862,7 +4895,7 @@ int Field_timestamp_hires::store_decimal(const my_decimal *d) int Field_timestamp_hires::set_time() { - THD *thd= table->in_use; + THD *thd= get_thd(); set_notnull(); store_TIME(thd->query_start(), thd->query_start_sec_part()); return 0; @@ -4981,7 +5014,7 @@ int Field_temporal::store(const char *from,uint len,CHARSET_INFO *cs) MYSQL_TIME ltime; int error; enum enum_mysql_timestamp_type func_res; - THD *thd= table->in_use; + THD *thd= get_thd(); ErrConvString str(from, len, cs); func_res= str_to_datetime(cs, from, len, <ime, @@ -4998,7 +5031,7 @@ int Field_temporal::store(double nr) { int error= 0; MYSQL_TIME ltime; - THD *thd= table->in_use; + THD *thd= get_thd(); ErrConvDouble str(nr); longlong tmp= double_to_datetime(nr, <ime, @@ -5016,7 +5049,7 @@ int Field_temporal::store(longlong nr, bool unsigned_val) int error; MYSQL_TIME ltime; longlong tmp; - THD *thd= table->in_use; + THD *thd= get_thd(); ErrConvInteger str(nr); tmp= number_to_datetime(nr, 0, <ime, (TIME_FUZZY_DATE | @@ -5080,7 +5113,7 @@ int Field_time::store(const char *from,uint len,CHARSET_INFO *cs) int was_cut; int have_smth_to_conv= str_to_time(cs, from, len, <ime, - table->in_use->variables.sql_mode & + get_thd()->variables.sql_mode & (MODE_NO_ZERO_DATE | MODE_NO_ZERO_IN_DATE | MODE_INVALID_DATES), &was_cut) > MYSQL_TIMESTAMP_ERROR; @@ -5186,7 +5219,7 @@ String *Field_time::val_str(String *val_buffer, bool Field_time::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate) { - THD *thd= table->in_use; + THD *thd= get_thd(); if (!(fuzzydate & (TIME_FUZZY_DATE|TIME_TIME_ONLY))) { push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, @@ -5376,7 +5409,7 @@ int Field_year::store(const char *from, uint len,CHARSET_INFO *cs) set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); return 1; } - if (table->in_use->count_cuted_fields && + if (get_thd()->count_cuted_fields && (error= check_int(cs, from, len, end, error))) { if (error == 1) /* empty or incorrect string */ @@ -5850,7 +5883,7 @@ int Field_datetime_hires::store_decimal(const my_decimal *d) int error; MYSQL_TIME ltime; longlong tmp; - THD *thd= table->in_use; + THD *thd= get_thd(); ErrConvDecimal str(d); if (my_decimal2seconds(d, &nr, &sec_part)) @@ -5987,7 +6020,9 @@ check_string_copy_error(Field_str *field, { const char *pos; char tmp[32]; - THD *thd= field->table->in_use; + THD *thd; + + thd= field->get_thd(); if (!(pos= well_formed_error_pos) && !(pos= cannot_convert_error_pos)) @@ -6029,11 +6064,12 @@ int Field_longstr::report_if_important_data(const char *pstr, const char *end, bool count_spaces) { - if ((pstr < end) && table->in_use->count_cuted_fields) + THD *thd= get_thd(); + if ((pstr < end) && thd->count_cuted_fields) { if (test_if_important_data(field_charset, pstr, end)) { - if (table->in_use->abort_on_warning) + if (thd->abort_on_warning) set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_DATA_TOO_LONG, 1); else set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); @@ -6060,7 +6096,7 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs) const char *from_end_pos; /* See the comment for Field_long::store(long long) */ - DBUG_ASSERT(table->in_use == current_thd); + DBUG_ASSERT(!table || table->in_use == current_thd); copy_length= well_formed_copy_nchars(field_charset, (char*) ptr, field_length, @@ -6106,7 +6142,7 @@ int Field_str::store(double nr) if (error) { - if (table->in_use->abort_on_warning) + if (get_thd()->abort_on_warning) set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_DATA_TOO_LONG, 1); else set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); @@ -6166,7 +6202,7 @@ double Field_string::val_real(void) double result; result= my_strntod(cs,(char*) ptr,field_length,&end,&error); - if (!table->in_use->no_errors && + if (!get_thd()->no_errors && (error || (field_length != (uint32)(end - (char*) ptr) && !check_if_only_end_space(cs, end, (char*) ptr + field_length)))) @@ -6190,7 +6226,7 @@ longlong Field_string::val_int(void) longlong result; result= my_strntoll(cs, (char*) ptr,field_length,10,&end,&error); - if (!table->in_use->no_errors && + if (!get_thd()->no_errors && (error || (field_length != (uint32)(end - (char*) ptr) && !check_if_only_end_space(cs, end, (char*) ptr + field_length)))) @@ -6210,9 +6246,9 @@ String *Field_string::val_str(String *val_buffer __attribute__((unused)), { ASSERT_COLUMN_MARKED_FOR_READ; /* See the comment for Field_long::store(long long) */ - DBUG_ASSERT(table->in_use == current_thd); + DBUG_ASSERT(!table || table->in_use == current_thd); uint length; - if (table->in_use->variables.sql_mode & + if (get_thd()->variables.sql_mode & MODE_PAD_CHAR_TO_FULL_LENGTH) length= my_charpos(field_charset, ptr, ptr + field_length, field_length / field_charset->mbmaxlen); @@ -6229,7 +6265,7 @@ my_decimal *Field_string::val_decimal(my_decimal *decimal_value) ASSERT_COLUMN_MARKED_FOR_READ; int err= str2my_decimal(E_DEC_FATAL_ERROR, (char*) ptr, field_length, charset(), decimal_value); - if (!table->in_use->no_errors && err) + if (!get_thd()->no_errors && err) { ErrConvString errmsg((char*) ptr, field_length, charset()); push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, @@ -6613,7 +6649,7 @@ double Field_varstring::val_real(void) uint length= length_bytes == 1 ? (uint) *ptr : uint2korr(ptr); result= my_strntod(cs, (char*)ptr+length_bytes, length, &end, &error); - if (!table->in_use->no_errors && + if (!get_thd()->no_errors && (error || (length != (uint)(end - (char*)ptr+length_bytes) && !check_if_only_end_space(cs, end, (char*)ptr+length_bytes+length)))) { @@ -6636,7 +6672,7 @@ longlong Field_varstring::val_int(void) longlong result= my_strntoll(cs, (char*) ptr+length_bytes, length, 10, &end, &error); - if (!table->in_use->no_errors && + if (!get_thd()->no_errors && (error || (length != (uint)(end - (char*)ptr+length_bytes) && !check_if_only_end_space(cs, end, (char*)ptr+length_bytes+length)))) { @@ -6665,7 +6701,7 @@ my_decimal *Field_varstring::val_decimal(my_decimal *decimal_value) int error= str2my_decimal(E_DEC_FATAL_ERROR, (char*) ptr+length_bytes, length, cs, decimal_value); - if (!table->in_use->no_errors && error) + if (!get_thd()->no_errors && error) { push_numerical_conversion_warning(current_thd, (char*)ptr+length_bytes, length, cs, "DECIMAL", @@ -7646,7 +7682,7 @@ int Field_enum::store(const char *from,uint length,CHARSET_INFO *cs) tmp=0; set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); } - if (!table->in_use->count_cuted_fields) + if (!get_thd()->count_cuted_fields) err= 0; } else @@ -7670,7 +7706,7 @@ int Field_enum::store(longlong nr, bool unsigned_val) if ((ulonglong) nr > typelib->count || nr == 0) { set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1); - if (nr != 0 || table->in_use->count_cuted_fields) + if (nr != 0 || get_thd()->count_cuted_fields) { nr= 0; error= 1; @@ -8200,7 +8236,7 @@ int Field_bit::store(const char *from, uint length, CHARSET_INFO *cs) { set_rec_bits((1 << bit_len) - 1, bit_ptr, bit_ofs, bit_len); memset(ptr, 0xff, bytes_in_rec); - if (table->in_use->really_abort_on_warning()) + if (get_thd()->really_abort_on_warning()) set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_DATA_TOO_LONG, 1); else set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); @@ -8336,7 +8372,9 @@ int Field_bit::cmp_max(const uchar *a, const uchar *b, uint max_len) if ((flag= (int) (bits_a - bits_b))) return flag; } - return memcmp(a, b, field_length); + if (!bytes_in_rec) + return 0; + return memcmp(a, b, bytes_in_rec); } @@ -8635,7 +8673,7 @@ int Field_bit_as_char::store(const char *from, uint length, CHARSET_INFO *cs) memset(ptr, 0xff, bytes_in_rec); if (bits) *ptr&= ((1 << bits) - 1); /* set first uchar */ - if (table->in_use->really_abort_on_warning()) + if (get_thd()->really_abort_on_warning()) set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_DATA_TOO_LONG, 1); else set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_DATA_OUT_OF_RANGE, 1); @@ -9739,7 +9777,7 @@ void Field::set_datetime_warning(MYSQL_ERROR::enum_warning_level level, uint code, const ErrConv *str, timestamp_type ts_type, int cuted_increment) { - THD *thd= table->in_use; + THD *thd= get_thd(); if (thd->really_abort_on_warning() && level >= MYSQL_ERROR::WARN_LEVEL_WARN) make_truncated_value_warning(thd, level, str, ts_type, field_name); else diff --git a/sql/field.h b/sql/field.h index da78a7c7674..f450e596e03 100644 --- a/sql/field.h +++ b/sql/field.h @@ -36,6 +36,8 @@ class Protocol; class Create_field; class Relay_log_info; class Field; +class Column_statistics; +class Column_statistics_collected; enum enum_check_fields { @@ -66,6 +68,8 @@ enum Derivation /* The length of the header part for each virtual column in the .frm file */ #define FRM_VCOL_HEADER_SIZE(b) (3 + test(b)) +class Count_distinct_field; + struct ha_field_option_struct; struct st_cache_field; @@ -216,6 +220,19 @@ public: */ bool is_created_from_null_item; + bool is_stat_field; /* TRUE in Field objects created for column min/max values */ + + /* + This structure is used for statistical data on the column + that has been read from the statistical table column_stat + */ + Column_statistics *read_stats; + /* + This structure is used for statistical data on the column that + is collected by the function collect_statistics_for_table + */ + Column_statistics_collected *collected_stats; + /* This is additional data provided for any computed(virtual) field. In particular it includes a pointer to the item by which this field @@ -308,6 +325,26 @@ public: virtual uint32 data_length() { return pack_length(); } virtual uint32 sort_length() const { return pack_length(); } + /* + Get the number bytes occupied by the value in the field. + CHAR values are stripped of trailing spaces. + Flexible values are stripped of their length. + */ + virtual uint32 value_length() + { + uint len; + if (!zero_pack() && + (type() == MYSQL_TYPE_STRING && + (len= pack_length()) >= 4 && len < 256)) + { + uchar *str, *end; + for (str= ptr, end= str+len; end > str && end[-1] == ' '; end--) {} + len=(uint) (end-str); + return len; + } + return data_length(); + } + /** Get the maximum size of the data in packed format. @@ -389,6 +426,36 @@ public: { return cmp(a, b); } virtual int key_cmp(const uchar *str, uint length) { return cmp(ptr,str); } + /* + Update the value m of the 'min_val' field with the current value v + of this field if force_update is set to TRUE or if v < m. + Return TRUE if the value has been updated. + */ + virtual bool update_min(Field *min_val, bool force_update) + { + bool update_fl= force_update || cmp(ptr, min_val->ptr) < 0; + if (update_fl) + { + min_val->set_notnull(); + memcpy(min_val->ptr, ptr, pack_length()); + } + return update_fl; + } + /* + Update the value m of the 'max_val' field with the current value v + of this field if force_update is set to TRUE or if v > m. + Return TRUE if the value has been updated. + */ + virtual bool update_max(Field *max_val, bool force_update) + { + bool update_fl= force_update || cmp(ptr, max_val->ptr) > 0; + if (update_fl) + { + max_val->set_notnull(); + memcpy(max_val->ptr, ptr, pack_length()); + } + return update_fl; + } virtual uint decimals() const { return 0; } /* Caller beware: sql_type can change str.Ptr, so check @@ -424,6 +491,8 @@ public: */ inline bool real_maybe_null(void) { return null_ptr != 0; } + inline THD *get_thd() { return table ? table->in_use : current_thd; } + enum { LAST_NULL_BYTE_UNDEF= 0 }; @@ -460,6 +529,9 @@ public: uchar *new_ptr, uchar *new_null_ptr, uint new_null_bit); Field *clone(MEM_ROOT *mem_root, TABLE *new_table); + Field *clone(MEM_ROOT *mem_root, TABLE *new_table, my_ptrdiff_t diff, + bool stat_flag= FALSE); + Field *clone(MEM_ROOT *mem_root, my_ptrdiff_t diff); inline void move_field(uchar *ptr_arg,uchar *null_ptr_arg,uchar null_bit_arg) { ptr=ptr_arg; null_ptr=null_ptr_arg; null_bit=null_bit_arg; @@ -1886,6 +1958,10 @@ public: int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0L); int key_cmp(const uchar *,const uchar*); int key_cmp(const uchar *str, uint length); + /* Never update the value of min_val for a blob field */ + bool update_min(Field *min_val, bool force_update) { return FALSE; } + /* Never update the value of max_val for a blob field */ + bool update_max(Field *max_val, bool force_update) { return FALSE; } uint32 key_length() const { return 0; } void sort_string(uchar *buff,uint length); uint32 pack_length() const @@ -1903,6 +1979,7 @@ public: { return (uint32) (packlength); } uint row_pack_length() { return pack_length_no_ptr(); } uint32 sort_length() const; + uint32 value_length() { return get_length(); } virtual uint32 max_data_length() const { return (uint32) (((ulonglong) 1 << (packlength*8)) -1); @@ -2174,6 +2251,28 @@ public: { return cmp_binary((uchar *) a, (uchar *) b); } int key_cmp(const uchar *str, uint length); int cmp_offset(uint row_offset); + bool update_min(Field *min_val, bool force_update) + { + longlong val= val_int(); + bool update_fl= force_update || val < min_val->val_int(); + if (update_fl) + { + min_val->set_notnull(); + min_val->store(val, FALSE); + } + return update_fl; + } + bool update_max(Field *max_val, bool force_update) + { + longlong val= val_int(); + bool update_fl= force_update || val > max_val->val_int(); + if (update_fl) + { + max_val->set_notnull(); + max_val->store(val, FALSE); + } + return update_fl; + } void get_image(uchar *buff, uint length, CHARSET_INFO *cs) { get_key_image(buff, length, itRAW); } void set_image(const uchar *buff,uint length, CHARSET_INFO *cs) diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index a24d144c97b..6ac51b6e380 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -3838,6 +3838,7 @@ int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt) SYNOPSIS start_bulk_insert() rows Number of rows to insert + flags Flags to control index creation RETURN VALUE NONE @@ -3845,7 +3846,7 @@ int ha_partition::truncate_partition(Alter_info *alter_info, bool *binlog_stmt) DESCRIPTION rows == 0 means we will probably insert many rows */ -void ha_partition::start_bulk_insert(ha_rows rows) +void ha_partition::start_bulk_insert(ha_rows rows, uint flags) { DBUG_ENTER("ha_partition::start_bulk_insert"); diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 7818e1e608f..b37b0f0b1c3 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -374,7 +374,7 @@ public: virtual int delete_row(const uchar * buf); virtual int delete_all_rows(void); virtual int truncate(); - virtual void start_bulk_insert(ha_rows rows); + virtual void start_bulk_insert(ha_rows rows, uint flags); virtual int end_bulk_insert(); private: ha_rows guess_bulk_insert_rows(); diff --git a/sql/handler.h b/sql/handler.h index 997088b1192..559648da37c 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2008,11 +2008,11 @@ public: /** to be actually called to get 'check()' functionality*/ int ha_check(THD *thd, HA_CHECK_OPT *check_opt); int ha_repair(THD* thd, HA_CHECK_OPT* check_opt); - void ha_start_bulk_insert(ha_rows rows) + void ha_start_bulk_insert(ha_rows rows, uint flags= 0) { DBUG_ENTER("handler::ha_start_bulk_insert"); estimation_rows_to_insert= rows; - start_bulk_insert(rows); + start_bulk_insert(rows, flags); DBUG_VOID_RETURN; } int ha_end_bulk_insert() @@ -2940,7 +2940,7 @@ private: DBUG_ASSERT(!(ha_table_flags() & HA_CAN_REPAIR)); return HA_ADMIN_NOT_IMPLEMENTED; } - virtual void start_bulk_insert(ha_rows rows) {} + virtual void start_bulk_insert(ha_rows rows, uint flags) {} virtual int end_bulk_insert() { return 0; } virtual int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index b04fda55736..217e65e401f 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -636,13 +636,24 @@ void Item_sum::cleanup() @retval > 0 if key1 > key2 */ -static int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2) +int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2) { Field *f= (Field*) arg; return f->cmp(key1, key2); } +C_MODE_START + +int count_distinct_walk(void *elem, element_count count, void *arg) +{ + (*((ulonglong*)arg))++; + return 0; +} + +C_MODE_END + + /** Correctly compare composite keys. @@ -710,13 +721,13 @@ C_MODE_START /* Declarations for auxilary C-callbacks */ -static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2) +int simple_raw_key_cmp(void* arg, const void* key1, const void* key2) { return memcmp(key1, key2, *(uint *) arg); } -static int item_sum_distinct_walk(void *element, element_count num_of_dups, +int item_sum_distinct_walk(void *element, element_count num_of_dups, void *item) { return ((Aggregator_distinct*) (item))->unique_walk_function(element); diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 12f1f7c5519..e6cbed7eb13 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -1201,7 +1201,7 @@ bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, uint parts= my_count_bits(key_tuple_map); ulong rpc; ulonglong rowids_size= rowid_buf_elem_size; - if ((rpc= key_info->rec_per_key[parts - 1])) + if ((rpc= key_info->actual_rec_per_key(parts - 1))) rowids_size= rowid_buf_elem_size * rpc; double fraction_for_rowids= diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8a873257090..2507b29baca 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -493,6 +493,7 @@ ulong executed_events=0; query_id_t global_query_id; my_atomic_rwlock_t global_query_id_lock; my_atomic_rwlock_t thread_running_lock; +my_atomic_rwlock_t statistics_lock; ulong aborted_threads, aborted_connects; ulong delayed_insert_timeout, delayed_insert_limit, delayed_queue_size; ulong delayed_insert_threads, delayed_insert_writes, delayed_rows_in_use; @@ -1908,6 +1909,7 @@ void clean_up(bool print_message) sys_var_end(); my_atomic_rwlock_destroy(&global_query_id_lock); my_atomic_rwlock_destroy(&thread_running_lock); + my_atomic_rwlock_destroy(&statistics_lock); mysql_mutex_lock(&LOCK_thread_count); DBUG_PRINT("quit", ("got thread count lock")); ready_to_exit=1; @@ -2486,21 +2488,6 @@ void dec_connection_count(THD *thd) /* - Delete the THD object and decrease number of threads - - SYNOPSIS - delete_thd() - thd Thread handler -*/ - -void delete_thd(THD *thd) -{ - thread_count--; - delete thd; -} - - -/* Unlink thd from global list of available connections and free thd SYNOPSIS @@ -2518,14 +2505,23 @@ void unlink_thd(THD *thd) thd_cleanup(thd); dec_connection_count(thd); + + mysql_mutex_lock(&LOCK_status); + add_to_status(&global_status_var, &thd->status_var); + mysql_mutex_unlock(&LOCK_status); + mysql_mutex_lock(&LOCK_thread_count); + thread_count--; + thd->unlink(); /* Used by binlog_reset_master. It would be cleaner to use DEBUG_SYNC here, but that's not possible because the THD's debug sync feature has been shut down at this point. */ DBUG_EXECUTE_IF("sleep_after_lock_thread_count_before_delete_thd", sleep(5);); - delete_thd(thd); + mysql_mutex_unlock(&LOCK_thread_count); + + delete thd; DBUG_VOID_RETURN; } @@ -2629,10 +2625,13 @@ bool one_thread_per_connection_end(THD *thd, bool put_in_cache) /* Mark that current_thd is not valid anymore */ my_pthread_setspecific_ptr(THR_THD, 0); if (put_in_cache) + { + mysql_mutex_lock(&LOCK_thread_count); put_in_cache= cache_thread(); - mysql_mutex_unlock(&LOCK_thread_count); - if (put_in_cache) - DBUG_RETURN(0); // Thread is reused + mysql_mutex_unlock(&LOCK_thread_count); + if (put_in_cache) + DBUG_RETURN(0); // Thread is reused + } /* It's safe to broadcast outside a lock (COND... is not deleted here) */ DBUG_PRINT("signal", ("Broadcasting COND_thread_count")); @@ -6222,6 +6221,7 @@ error: */ struct my_option my_long_options[]= + { {"help", '?', "Display this help and exit.", &opt_help, &opt_help, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, @@ -7553,6 +7553,7 @@ static int mysql_init_variables(void) global_query_id= thread_id= 1L; my_atomic_rwlock_init(&global_query_id_lock); my_atomic_rwlock_init(&thread_running_lock); + my_atomic_rwlock_init(&statistics_lock); strmov(server_version, MYSQL_SERVER_VERSION); threads.empty(); thread_cache.empty(); diff --git a/sql/mysqld.h b/sql/mysqld.h index a07efa9178f..e0efbe60390 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -107,6 +107,7 @@ extern char* opt_secure_backup_file_priv; extern size_t opt_secure_backup_file_priv_len; extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements; extern my_bool sp_automatic_privileges, opt_noacl; +extern ulong use_stat_tables; extern my_bool opt_old_style_user_limits, trust_function_creators; extern uint opt_crash_binlog_innodb; extern char *shared_memory_base_name, *mysqld_unix_port; @@ -573,6 +574,7 @@ enum enum_query_type typedef int64 query_id_t; extern query_id_t global_query_id; extern my_atomic_rwlock_t global_query_id_lock; +extern my_atomic_rwlock_t statistics_lock; void unireg_end(void) __attribute__((noreturn)); diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 8aeb89ed9ed..72eb8460496 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2926,7 +2926,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, (ulong) const_tables)); - DBUG_PRINT("info", ("records: %lu", (ulong) head->file->stats.records)); + DBUG_PRINT("info", ("records: %lu", (ulong) head->stat_records())); delete quick; quick=0; needed_reg.clear_all(); @@ -2934,7 +2934,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, DBUG_ASSERT(!head->is_filled_at_execution()); if (keys_to_use.is_clear_all() || head->is_filled_at_execution()) DBUG_RETURN(0); - records= head->file->stats.records; + records= head->stat_records(); if (!records) records++; /* purecov: inspected */ scan_time= (double) records / TIME_FOR_COMPARE + 1; @@ -3071,7 +3071,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, if (group_trp) { param.table->quick_condition_rows= min(group_trp->records, - head->file->stats.records); + head->stat_records()); if (group_trp->read_cost < best_read_time) { best_trp= group_trp; @@ -4677,7 +4677,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, DBUG_PRINT("info", ("index_merge scans cost %g", imerge_cost)); if (imerge_too_expensive || (imerge_cost > read_time) || ((non_cpk_scan_records+cpk_scan_records >= - param->table->file->stats.records) && + param->table->stat_records()) && read_time != DBL_MAX)) { /* @@ -4748,7 +4748,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, imerge_trp->read_cost= imerge_cost; imerge_trp->records= non_cpk_scan_records + cpk_scan_records; imerge_trp->records= min(imerge_trp->records, - param->table->file->stats.records); + param->table->stat_records()); imerge_trp->range_scans= range_scans; imerge_trp->range_scans_end= range_scans + n_child_scans; read_time= imerge_cost; @@ -4819,7 +4819,7 @@ skip_to_ror_scan: ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs; roru_total_records += (*cur_roru_plan)->records; roru_intersect_part *= (*cur_roru_plan)->records / - param->table->file->stats.records; + param->table->stat_records(); } /* @@ -4829,7 +4829,7 @@ skip_to_ror_scan: in disjunction do not share key parts. */ roru_total_records -= (ha_rows)(roru_intersect_part* - param->table->file->stats.records); + param->table->stat_records()); /* ok, got a ROR read plan for each of the disjuncts Calculate cost: cost(index_union_scan(scan_1, ... scan_n)) = @@ -5106,12 +5106,12 @@ static inline ha_rows get_table_cardinality_for_index_intersect(TABLE *table) { if (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) - return table->file->stats.records; + return table->stat_records(); else { ha_rows d; double q; - for (q= (double)table->file->stats.records, d= 1 ; q >= 10; q/= 10, d*= 10 ) ; + for (q= (double)table->stat_records(), d= 1 ; q >= 10; q/= 10, d*= 10 ) ; return (ha_rows) (floor(q+0.5) * d); } } @@ -5514,9 +5514,8 @@ ha_rows records_in_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, ha_rows ext_records= ext_index_scan->records; if (i < used_key_parts) { - ulong *rec_per_key= key_info->rec_per_key+i-1; - ulong f1= rec_per_key[0] ? rec_per_key[0] : 1; - ulong f2= rec_per_key[1] ? rec_per_key[1] : 1; + ulong f1= key_info->actual_rec_per_key(i-1); + ulong f2= key_info->actual_rec_per_key(i); ext_records= (ha_rows) ((double) ext_records / f2 * f1); } if (ext_records < table_cardinality) @@ -6008,7 +6007,7 @@ ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param) info->is_covering= FALSE; info->index_scan_costs= 0.0; info->index_records= 0; - info->out_rows= (double) param->table->file->stats.records; + info->out_rows= (double) param->table->stat_records(); bitmap_clear_all(&info->covered_fields); return info; } @@ -6134,7 +6133,7 @@ static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, min_range.flag= HA_READ_KEY_EXACT; max_range.key= key_val; max_range.flag= HA_READ_AFTER_KEY; - ha_rows prev_records= info->param->table->file->stats.records; + ha_rows prev_records= info->param->table->stat_records(); DBUG_ENTER("ror_scan_selectivity"); for (sel_arg= scan->sel_arg; sel_arg; @@ -6361,7 +6360,7 @@ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree, double min_cost= DBL_MAX; DBUG_ENTER("get_best_ror_intersect"); - if ((tree->n_ror_scans < 2) || !param->table->file->stats.records || + if ((tree->n_ror_scans < 2) || !param->table->stat_records() || !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT)) DBUG_RETURN(NULL); @@ -12659,14 +12658,14 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, double cpu_cost= 0; /* TODO: CPU cost of index_read calls? */ DBUG_ENTER("cost_group_min_max"); - table_records= table->file->stats.records; + table_records= table->stat_records(); keys_per_block= (table->file->stats.block_size / 2 / (index_info->key_length + table->file->ref_length) + 1); num_blocks= (uint)(table_records / keys_per_block) + 1; /* Compute the number of keys in a group. */ - keys_per_group= index_info->rec_per_key[group_key_parts - 1]; + keys_per_group= index_info->actual_rec_per_key(group_key_parts - 1); if (keys_per_group == 0) /* If there is no statistics try to guess */ /* each group contains 10% of all records */ keys_per_group= (uint)(table_records / 10) + 1; @@ -12686,7 +12685,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, Compute the probability that two ends of a subgroup are inside different blocks. */ - keys_per_subgroup= index_info->rec_per_key[used_key_parts - 1]; + keys_per_subgroup= index_info->actual_rec_per_key(used_key_parts - 1); if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */ p_overlap= 1.0; /* a block, it will overlap at least two blocks. */ else diff --git a/sql/scheduler.cc b/sql/scheduler.cc index 0ae4121ef4c..54653557b16 100644 --- a/sql/scheduler.cc +++ b/sql/scheduler.cc @@ -35,7 +35,6 @@ static bool no_threads_end(THD *thd, bool put_in_cache) { unlink_thd(thd); - mysql_mutex_unlock(&LOCK_thread_count); return 1; // Abort handle_one_connection } diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index c65e56edbe0..e6bbef482a7 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -27,7 +27,9 @@ #include "sql_acl.h" // *_ACL #include "sp.h" // Sroutine_hash_entry #include "sql_parse.h" // check_table_access +#include "strfunc.h" #include "sql_admin.h" +#include "sql_statistics.h" /* Prepare, run and cleanup for mysql_recreate_table() */ @@ -320,7 +322,9 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, Protocol *protocol= thd->protocol; LEX *lex= thd->lex; int result_code; + int compl_result_code; bool need_repair_or_alter= 0; + DBUG_ENTER("mysql_admin_table"); DBUG_PRINT("enter", ("extra_open_options: %u", extra_open_options)); @@ -640,9 +644,92 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } } - DBUG_PRINT("admin", ("calling operator_func '%s'", operator_name)); - result_code = (table->table->file->*operator_func)(thd, check_opt); - DBUG_PRINT("admin", ("operator_func returned: %d", result_code)); + result_code= compl_result_code= HA_ADMIN_OK; + + if (operator_func == &handler::ha_analyze) + { + TABLE *tab= table->table; + Field **field_ptr= tab->field; + + if (lex->with_persistent_for_clause && + tab->s->table_category != TABLE_CATEGORY_USER) + { + compl_result_code= result_code= HA_ADMIN_INVALID; + } + + if (!lex->column_list) + { + uint fields= 0; + for ( ; *field_ptr; field_ptr++, fields++) ; + bitmap_set_prefix(tab->read_set, fields); + } + else + { + int pos; + LEX_STRING *column_name; + List_iterator_fast<LEX_STRING> it(*lex->column_list); + + bitmap_clear_all(tab->read_set); + while ((column_name= it++)) + { + if (tab->s->fieldnames.type_names == 0 || + (pos= find_type(&tab->s->fieldnames, column_name->str, + column_name->length, 1)) <= 0) + { + compl_result_code= result_code= HA_ADMIN_INVALID; + break; + } + bitmap_set_bit(tab->read_set, pos-1); + } + tab->file->column_bitmaps_signal(); + } + + if (!lex->index_list) + { + tab->keys_in_use_for_query.init(tab->s->keys); + } + else + { + int pos; + LEX_STRING *index_name; + List_iterator_fast<LEX_STRING> it(*lex->index_list); + + tab->keys_in_use_for_query.clear_all(); + while ((index_name= it++)) + { + if (tab->s->keynames.type_names == 0 || + (pos= find_type(&tab->s->keynames, index_name->str, + index_name->length, 1)) <= 0) + { + compl_result_code= result_code= HA_ADMIN_INVALID; + break; + } + tab->keys_in_use_for_query.set_bit(--pos); + } + } + } + + if (result_code == HA_ADMIN_OK) + { + DBUG_PRINT("admin", ("calling operator_func '%s'", operator_name)); + result_code = (table->table->file->*operator_func)(thd, check_opt); + DBUG_PRINT("admin", ("operator_func returned: %d", result_code)); + } + + if (compl_result_code == HA_ADMIN_OK && + operator_func == &handler::ha_analyze && + table->table->s->table_category == TABLE_CATEGORY_USER && + (get_use_stat_tables_mode(thd) > NEVER || + lex->with_persistent_for_clause)) + { + if (!(compl_result_code= + alloc_statistics_for_table(thd, table->table)) && + !(compl_result_code= + collect_statistics_for_table(thd, table->table))) + compl_result_code= update_statistics_for_table(thd, table->table); + if (compl_result_code) + result_code= HA_ADMIN_FAILED; + } if (result_code == HA_ADMIN_NOT_IMPLEMENTED && need_repair_or_alter) { diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 834e5ac0046..ec0438946c6 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -49,6 +49,7 @@ #include "sql_trigger.h" #include "transaction.h" #include "sql_prepare.h" +#include "sql_statistics.h" #include <m_ctype.h> #include <my_dir.h> #include <hash.h> @@ -2417,10 +2418,11 @@ void drop_open_table(THD *thd, TABLE *table, const char *db_name, Check that table exists in table definition cache, on disk or in some storage engine. - @param thd Thread context - @param table Table list element - @param[out] exists Out parameter which is set to TRUE if table - exists and to FALSE otherwise. + @param thd Thread context + @param table Table list element + @param fast_check Check only if share or .frm file exists + @param[out] exists Out parameter which is set to TRUE if table + exists and to FALSE otherwise. @note This function acquires LOCK_open internally. @@ -2432,7 +2434,8 @@ void drop_open_table(THD *thd, TABLE *table, const char *db_name, @retval FALSE No error. 'exists' out parameter set accordingly. */ -bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists) +bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool fast_check, + bool *exists) { char path[FN_REFLEN + 1]; TABLE_SHARE *share; @@ -2440,7 +2443,8 @@ bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists) *exists= TRUE; - DBUG_ASSERT(thd->mdl_context. + DBUG_ASSERT(fast_check || + thd->mdl_context. is_lock_owner(MDL_key::TABLE, table->db, table->table_name, MDL_SHARED)); @@ -2457,6 +2461,12 @@ bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists) if (!access(path, F_OK)) goto end; + if (fast_check) + { + *exists= FALSE; + goto end; + } + /* .FRM file doesn't exist. Check if some engine can provide it. */ if (ha_check_if_table_exists(thd, table->db, table->table_name, exists)) { @@ -3021,7 +3031,7 @@ bool open_table(THD *thd, TABLE_LIST *table_list, MEM_ROOT *mem_root, { bool exists; - if (check_if_table_exists(thd, table_list, &exists)) + if (check_if_table_exists(thd, table_list, 0, &exists)) DBUG_RETURN(TRUE); if (!exists) @@ -4670,6 +4680,31 @@ open_and_process_table(THD *thd, LEX *lex, TABLE_LIST *tables, goto end; } + if (get_use_stat_tables_mode(thd) > NEVER && tables->table) + { + TABLE_SHARE *table_share= tables->table->s; + if (table_share && table_share->table_category == TABLE_CATEGORY_USER && + table_share->tmp_table == NO_TMP_TABLE) + { + if (table_share->stats_cb.stats_can_be_read || + !alloc_statistics_for_table_share(thd, table_share, FALSE)) + { + if (table_share->stats_cb.stats_can_be_read) + { + KEY *key_info= table_share->key_info; + KEY *key_info_end= key_info + table_share->keys; + KEY *table_key_info= tables->table->key_info; + for ( ; key_info < key_info_end; key_info++, table_key_info++) + table_key_info->read_stats= key_info->read_stats; + Field **field_ptr= table_share->field; + Field **table_field_ptr= tables->table->field; + for ( ; *field_ptr; field_ptr++, table_field_ptr++) + (*table_field_ptr)->read_stats= (*field_ptr)->read_stats; + } + } + } + } + process_view_routines: /* Again we may need cache all routines used by this view and add @@ -4718,7 +4753,18 @@ extern "C" uchar *schema_set_get_key(const uchar *record, size_t *length, open, see open_table() description for details. @retval FALSE Success. - @retval TRUE Failure (e.g. connection was killed) + @retval TRUE Failure (e.g. connection was killed) or table existed + for a CREATE TABLE. + + @notes + In case of CREATE TABLE we avoid a wait for tables that are in use + by first trying to do a meta data lock with timeout == 0. If we get a + timeout we will check if table exists (it should) and retry with + normal timeout if it didn't exists. + Note that for CREATE TABLE IF EXISTS we only generate a warning + but still return TRUE (to abort the calling open_table() function). + On must check THD->is_error() if one wants to distinguish between warning + and error. */ bool @@ -4730,6 +4776,10 @@ lock_table_names(THD *thd, TABLE_LIST *table; MDL_request global_request; Hash_set<TABLE_LIST, schema_set_get_key> schema_set; + ulong org_lock_wait_timeout= lock_wait_timeout; + /* Check if we are using CREATE TABLE ... IF NOT EXISTS */ + bool create_table; + Dummy_error_handler error_handler; DBUG_ENTER("lock_table_names"); DBUG_ASSERT(!thd->locked_tables_mode); @@ -4760,8 +4810,14 @@ lock_table_names(THD *thd, } } - if (! (flags & MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK) && - ! mdl_requests.is_empty()) + if (mdl_requests.is_empty()) + DBUG_RETURN(FALSE); + + /* Check if CREATE TABLE IF NOT EXISTS was used */ + create_table= (tables_start && tables_start->open_strategy == + TABLE_LIST::OPEN_IF_EXISTS); + + if (!(flags & MYSQL_OPEN_SKIP_SCOPED_MDL_LOCK)) { /* Scoped locks: Take intention exclusive locks on all involved @@ -4789,12 +4845,58 @@ lock_table_names(THD *thd, global_request.init(MDL_key::GLOBAL, "", "", MDL_INTENTION_EXCLUSIVE, MDL_STATEMENT); mdl_requests.push_front(&global_request); + + if (create_table) + lock_wait_timeout= 0; // Don't wait for timeout } - if (thd->mdl_context.acquire_locks(&mdl_requests, lock_wait_timeout)) - DBUG_RETURN(TRUE); + for (;;) + { + bool exists= TRUE; + bool res; - DBUG_RETURN(FALSE); + if (create_table) + thd->push_internal_handler(&error_handler); // Avoid warnings & errors + res= thd->mdl_context.acquire_locks(&mdl_requests, lock_wait_timeout); + if (create_table) + thd->pop_internal_handler(); + if (!res) + DBUG_RETURN(FALSE); // Got locks + + if (!create_table) + DBUG_RETURN(TRUE); // Return original error + + /* + We come here in the case of lock timeout when executing + CREATE TABLE IF NOT EXISTS. + Verify that table really exists (it should as we got a lock conflict) + */ + if (check_if_table_exists(thd, tables_start, 1, &exists)) + DBUG_RETURN(TRUE); // Should never happen + if (exists) + { + if (thd->lex->create_info.options & HA_LEX_CREATE_IF_NOT_EXISTS) + { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, + ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), + tables_start->table_name); + } + else + my_error(ER_TABLE_EXISTS_ERROR, MYF(0), tables_start->table_name); + DBUG_RETURN(TRUE); + } + /* purecov: begin inspected */ + /* + We got error from acquire_locks but table didn't exists. + In theory this should never happen, except maybe in + CREATE or DROP DATABASE scenario. + We play safe and restart the original acquire_locks with the + original timeout + */ + create_table= 0; + lock_wait_timeout= org_lock_wait_timeout; + /* purecov: end */ + } } @@ -4916,7 +5018,7 @@ bool open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags, } /* - Initialize temporary MEM_ROOT for new .FRM parsing. Do not allocate + Initialize temporary MEM_ROOT for new .FRM parsing. Do not alloctaate anything yet, to avoid penalty for statements which don't use views and thus new .FRM format. */ @@ -5624,6 +5726,8 @@ bool open_and_lock_tables(THD *thd, TABLE_LIST *tables, if (lock_tables(thd, tables, counter, flags)) goto err; + (void) read_statistics_for_tables_if_needed(thd, tables); + if (derived) { if (mysql_handle_derived(thd->lex, DT_INIT)) @@ -9631,6 +9735,12 @@ has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables) must call close_system_tables() to close systems tables opened with this call. + NOTES + In some situations we use this function to open system tables for + writing. It happens, for examples, with statistical tables when + they are updated by an ANALYZE command. In these cases we should + guarantee that system tables will not be deadlocked. + RETURN FALSE Success TRUE Error diff --git a/sql/sql_base.h b/sql/sql_base.h index af63aefa7f9..5bbccc87cdc 100644 --- a/sql/sql_base.h +++ b/sql/sql_base.h @@ -302,11 +302,20 @@ TABLE *find_table_for_mdl_upgrade(THD *thd, const char *db, const char *table_name, bool no_error); void mark_tmp_table_for_reuse(TABLE *table); -bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool *exists); +bool check_if_table_exists(THD *thd, TABLE_LIST *table, bool fast_check, + bool *exists); int update_virtual_fields(THD *thd, TABLE *table, enum enum_vcol_update_mode vcol_update_mode= VCOL_UPDATE_FOR_READ); int dynamic_column_error_message(enum_dyncol_func_result rc); +/* open_and_lock_tables with optional derived handling */ +int open_and_lock_tables_derived(THD *thd, TABLE_LIST *tables, bool derived); + +extern "C" int simple_raw_key_cmp(void* arg, const void* key1, + const void* key2); +extern "C" int count_distinct_walk(void *elem, element_count count, void *arg); +int simple_str_key_cmp(void* arg, uchar* key1, uchar* key2); + extern TABLE *unused_tables; extern Item **not_found_item; extern Field *not_found_field; @@ -473,7 +482,6 @@ open_tables(THD *thd, TABLE_LIST **tables, uint *counter, uint flags) return open_tables(thd, tables, counter, flags, &prelocking_strategy); } - inline TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l, thr_lock_type lock_type, uint flags) { diff --git a/sql/sql_class.cc b/sql/sql_class.cc index cbb798d96a4..0709509a740 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1498,7 +1498,6 @@ THD::~THD() mysql_mutex_lock(&LOCK_thd_data); mysys_var=0; // Safety (shouldn't be needed) mysql_mutex_unlock(&LOCK_thd_data); - add_to_status(&global_status_var, &status_var); /* Close connection */ #ifndef EMBEDDED_LIBRARY diff --git a/sql/sql_class.h b/sql/sql_class.h index 85ebccabf54..c50d8c9aba6 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -511,6 +511,7 @@ typedef struct system_variables ulong net_write_timeout; ulong optimizer_prune_level; ulong optimizer_search_depth; + ulong use_stat_tables; ulong preload_buff_size; ulong profiling_history_size; ulong read_buff_size; diff --git a/sql/sql_db.cc b/sql/sql_db.cc index 5704b6d51b6..dc8a2e9f057 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -36,6 +36,7 @@ #include "sp.h" #include "events.h" #include "sql_handler.h" +#include "sql_statistics.h" #include <my_dir.h> #include <m_ctype.h> #include "log.h" @@ -816,6 +817,17 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) lock_db_routines(thd, db)) goto exit; + if (!in_bootstrap) + { + for (table= tables; table; table= table->next_local) + { + LEX_STRING db_name= { table->db, table->db_length }; + LEX_STRING table_name= { table->table_name, table->table_name_length }; + if (table->open_type == OT_BASE_ONLY || !find_temporary_table(thd, table)) + (void) delete_statistics_for_table(thd, &db_name, &table_name); + } + } + /* mysql_ha_rm_tables() requires a non-null TABLE_LIST. */ if (tables) mysql_ha_rm_tables(thd, tables); @@ -927,7 +939,7 @@ update_binlog: char quoted_name[FN_REFLEN+3]; // Only write drop table to the binlog for tables that no longer exist. - if (check_if_table_exists(thd, tbl, &exists)) + if (check_if_table_exists(thd, tbl, 0, &exists)) { error= true; goto exit; diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 09ddedecc75..8a1b5f46bce 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -35,6 +35,7 @@ #include "sql_select.h" #include "sp_head.h" #include "sql_trigger.h" +#include "sql_statistics.h" #include "transaction.h" #include "records.h" // init_read_record, #include "sql_derived.h" // mysql_handle_list_of_derived @@ -200,6 +201,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, #endif /* Update the table->file->stats.records number */ table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + set_statistics_for_table(thd, table); table->covering_keys.clear_all(); table->quick_keys.clear_all(); // Can't use 'only index' diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 1f375bf1a03..07266cc9cbe 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -3812,7 +3812,8 @@ uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(ulong recno) uint incr= 0; TABLE_REF *ref= &join_tab->ref; TABLE *tab= join_tab->table; - uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1]; + uint rec_per_key= + tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1); set_if_bigger(rec_per_key, 1); if (recno == 1) incr= ref->key_length + tab->file->ref_length; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 2c9a55208fc..1c07f0e23c2 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -458,6 +458,9 @@ void lex_start(THD *thd) lex->set_var_list.empty(); lex->param_list.empty(); lex->view_list.empty(); + lex->with_persistent_for_clause= FALSE; + lex->column_list= NULL; + lex->index_list= NULL; lex->prepared_stmt_params.empty(); lex->auxiliary_table_list.empty(); lex->unit.next= lex->unit.master= diff --git a/sql/sql_lex.h b/sql/sql_lex.h index fd789143c94..2390f207aa4 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -2332,6 +2332,8 @@ struct LEX: public Query_tables_list List<Item_func_set_user_var> set_var_list; // in-query assignment list List<Item_param> param_list; List<LEX_STRING> view_list; // view list (list of field names in view) + List<LEX_STRING> *column_list; // list of column names (in ANALYZE) + List<LEX_STRING> *index_list; // list of index names (in ANALYZE) /* A stack of name resolution contexts for the query. This stack is used at parse time to set local name resolution contexts for various parts @@ -2392,6 +2394,7 @@ struct LEX: public Query_tables_list this command. */ bool parse_vcol_expr; + bool with_persistent_for_clause; // uses PERSISTENT FOR clause (in ANALYZE) enum SSL_type ssl_type; /* defined in violite.h */ enum enum_duplicates duplicates; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 14601f99138..0690810a6dc 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -2826,7 +2826,14 @@ case SQLCOM_PREPARE: goto end_with_restore_list; } - if (!(res= open_and_lock_tables(thd, lex->query_tables, TRUE, 0))) + res= open_and_lock_tables(thd, lex->query_tables, TRUE, 0); + if (res) + { + /* Got error or warning. Set res to 1 if error */ + if (!(res= thd->is_error())) + my_ok(thd); // CREATE ... IF NOT EXISTS + } + else { /* The table already exists */ if (create_table->table) diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 762a7769acf..39976e1a430 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -327,6 +327,7 @@ enum enum_yes_no_unknown }; #ifdef MYSQL_SERVER + /* External variables */ diff --git a/sql/sql_rename.cc b/sql/sql_rename.cc index 6b0d1e980f9..c91623cee6e 100644 --- a/sql/sql_rename.cc +++ b/sql/sql_rename.cc @@ -28,6 +28,7 @@ #include "lock.h" // MYSQL_OPEN_SKIP_TEMPORARY #include "sql_base.h" // tdc_remove_table, lock_table_names, #include "sql_handler.h" // mysql_ha_rm_tables +#include "sql_statistics.h" #include "datadict.h" static TABLE_LIST *rename_tables(THD *thd, TABLE_LIST *table_list, @@ -279,6 +280,12 @@ do_rename(THD *thd, TABLE_LIST *ren_table, char *new_db, char *new_table_name, ren_table->db, old_alias, new_db, new_alias, 0))) { + LEX_STRING db_name= { ren_table->db, ren_table->db_length }; + LEX_STRING table_name= { ren_table->table_name, + ren_table->table_name_length }; + LEX_STRING new_table= { (char *) new_alias, strlen(new_alias) }; + (void) rename_table_in_stat_tables(thd, &db_name, &table_name, + &db_name, &new_table); if ((rc= Table_triggers_list::change_table_name(thd, ren_table->db, old_alias, ren_table->table_name, diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 90302897a49..86a4168867b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -51,6 +51,7 @@ #include "opt_subselect.h" #include "log_slow.h" #include "sql_derived.h" +#include "sql_statistics.h" #include "debug_sync.h" // DEBUG_SYNC #include <m_ctype.h> @@ -3316,6 +3317,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, table_vector[i]=s->table=table=tables->table; table->pos_in_table_list= tables; error= tables->fetch_number_of_rows(); + set_statistics_for_table(join->thd, table); #ifdef WITH_PARTITION_STORAGE_ENGINE const bool no_partitions_used= table->no_partitions_used; @@ -3347,8 +3349,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->dependent= tables->dep_tables; if (tables->schema_table) - table->file->stats.records= 2; - table->quick_condition_rows= table->file->stats.records; + table->file->stats.records= table->used_stat_records= 2; + table->quick_condition_rows= table->stat_records(); s->on_expr_ref= &tables->on_expr; if (*s->on_expr_ref) @@ -3581,7 +3583,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, // All dep. must be constants if (s->dependent & ~(found_const_table_map)) continue; - if (table->file->stats.records <= 1L && + if (table->stat_records() <= 1L && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && !table->pos_in_table_list->embedding && !((outer_join & table->map) && @@ -5550,7 +5552,7 @@ best_access_path(JOIN *join, else { uint key_parts= table->actual_n_key_parts(keyinfo); - if (!(records=keyinfo->rec_per_key[key_parts-1])) + if (!(records= keyinfo->actual_rec_per_key(key_parts-1))) { /* Prefer longer keys */ records= ((double) s->records / (double) rec * @@ -5650,7 +5652,7 @@ best_access_path(JOIN *join, else { /* Check if we have statistic about the distribution */ - if ((records= keyinfo->rec_per_key[max_key_part-1])) + if ((records= keyinfo->actual_rec_per_key(max_key_part-1))) { /* Fix for the case where the index statistics is too @@ -7467,10 +7469,10 @@ JOIN_TAB *next_breadth_first_tab(JOIN *join, enum enum_exec_or_opt tabs_kind, } -JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const) +JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls) { JOIN_TAB *tab= join->join_tab; - if (with_const == WITH_CONST_TABLES) + if (const_tbls == WITHOUT_CONST_TABLES) { if (join->const_tables == join->table_count) return NULL; @@ -7866,6 +7868,7 @@ static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab, keyinfo->key_length=0; keyinfo->algorithm= HA_KEY_ALG_UNDEF; keyinfo->flags= HA_GENERATED_KEY; + keyinfo->is_statistics_from_stat_tables= FALSE; keyinfo->name= (char *) "$hj"; keyinfo->rec_per_key= (ulong*) thd->calloc(sizeof(ulong)*key_parts); if (!keyinfo->rec_per_key) @@ -10439,7 +10442,7 @@ double JOIN_TAB::scan_time() } else { - found_records= records= table->file->stats.records; + found_records= records= table->stat_records(); read_time= table->file->scan_time(); /* table->quick_condition_rows has already been set to @@ -10450,7 +10453,7 @@ double JOIN_TAB::scan_time() } else { - found_records= records=table->file->stats.records; + found_records= records=table->stat_records(); read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub res= read_time; } @@ -10491,7 +10494,7 @@ ha_rows JOIN_TAB::get_examined_rows() handler->info(HA_STATUS_VARIABLE) has been called in make_join_statistics() */ - examined_rows= table->file->stats.records; + examined_rows= table->stat_records(); } } } @@ -14851,8 +14854,11 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; keyinfo->ext_key_parts= keyinfo->key_parts; keyinfo->key_length=0; - keyinfo->rec_per_key=0; + keyinfo->rec_per_key=NULL; + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->is_statistics_from_stat_tables= FALSE; keyinfo->name= (char*) "group_key"; ORDER *cur_group= group; for (; cur_group ; cur_group= cur_group->next, key_part_info++) @@ -14965,7 +14971,10 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields, keyinfo->key_length= 0; // Will compute the sum of the parts below. keyinfo->name= (char*) "distinct_key"; keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->is_statistics_from_stat_tables= FALSE; keyinfo->rec_per_key=0; + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; /* Create an extra field to hold NULL bits so that unique indexes on @@ -18764,7 +18773,7 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, uint saved_best_key_parts= 0; int best_key_direction= 0; JOIN *join= tab->join; - ha_rows table_records= table->file->stats.records; + ha_rows table_records= table->stat_records(); test_if_cheaper_ordering(tab, order, table, usable_keys, ref_key, select_limit, @@ -18879,7 +18888,7 @@ check_reverse_order: { tab->ref.key= -1; tab->ref.key_parts= 0; - if (select_limit < table->file->stats.records) + if (select_limit < table->stat_records()) tab->limit= select_limit; table->disable_keyread(); } @@ -23070,7 +23079,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, int best_key= -1; bool is_best_covering= FALSE; double fanout= 1; - ha_rows table_records= table->file->stats.records; + ha_rows table_records= table->stat_records(); bool group= join && join->group && order == join->group_list; ha_rows ref_key_quick_rows= HA_POS_ERROR; const bool has_limit= (select_limit_arg != HA_POS_ERROR); @@ -23162,7 +23171,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, if (used_key_parts > used_index_parts) used_pk_parts= used_key_parts-used_index_parts; rec_per_key= used_key_parts ? - keyinfo->rec_per_key[used_key_parts-1] : 1; + keyinfo->actual_rec_per_key(used_key_parts-1) : 1; /* Take into account the selectivity of the used pk prefix */ if (used_pk_parts) { @@ -23177,8 +23186,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, rec_per_key= 1; if (rec_per_key > 1) { - rec_per_key*= pkinfo->rec_per_key[used_pk_parts-1]; - rec_per_key/= pkinfo->rec_per_key[0]; + rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1); + rec_per_key/= pkinfo->actual_rec_per_key(0); /* The value of rec_per_key for the extended key has to be adjusted accordingly if some components of @@ -23192,9 +23201,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, We presume here that for any index rec_per_key[i] != 0 if rec_per_key[0] != 0. */ - DBUG_ASSERT(pkinfo->rec_per_key[i]); - rec_per_key*= pkinfo->rec_per_key[i-1]; - rec_per_key/= pkinfo->rec_per_key[i]; + DBUG_ASSERT(pkinfo->actual_rec_per_key(i)); + rec_per_key*= pkinfo->actual_rec_per_key(i-1); + rec_per_key/= pkinfo->actual_rec_per_key(i); } } } @@ -23239,7 +23248,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, select_limit= (ha_rows) (select_limit * (double) table_records / table->quick_condition_rows); - rec_per_key= keyinfo->rec_per_key[keyinfo->key_parts-1]; + rec_per_key= keyinfo->actual_rec_per_key(keyinfo->key_parts-1); set_if_bigger(rec_per_key, 1); /* Here we take into account the fact that rows are @@ -23380,7 +23389,7 @@ uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select, Update quick_condition_rows since single table UPDATE/DELETE procedures don't call make_join_statistics() and leave this variable uninitialized. */ - table->quick_condition_rows= table->file->stats.records; + table->quick_condition_rows= table->stat_records(); int key, direction; if (test_if_cheaper_ordering(NULL, order, table, diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 4d251aa1c81..1ef3ce2e40a 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -45,6 +45,7 @@ #include "set_var.h" #include "sql_trigger.h" #include "sql_derived.h" +#include "sql_statistics.h" #include "sql_connect.h" #include "authors.h" #include "contributors.h" @@ -3388,13 +3389,13 @@ bool get_lookup_value(THD *thd, Item_func *item_func, Item_field *item_field; CHARSET_INFO *cs= system_charset_info; - if (item_func->arguments()[0]->type() == Item::FIELD_ITEM && + if (item_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && item_func->arguments()[1]->const_item()) { idx_field= 0; idx_val= 1; } - else if (item_func->arguments()[1]->type() == Item::FIELD_ITEM && + else if (item_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && item_func->arguments()[0]->const_item()) { idx_field= 1; @@ -3403,7 +3404,7 @@ bool get_lookup_value(THD *thd, Item_func *item_func, else return 0; - item_field= (Item_field*) item_func->arguments()[idx_field]; + item_field= (Item_field*) item_func->arguments()[idx_field]->real_item(); if (table->table != item_field->field->table) return 0; tmp_str= item_func->arguments()[idx_val]->val_str(&str_buff); @@ -5929,9 +5930,12 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, TABLE *show_table= tables->table; KEY *key_info=show_table->s->key_info; if (show_table->file) + { show_table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_TIME); + set_statistics_for_table(thd, show_table); + } for (uint i=0 ; i < show_table->s->keys ; i++,key_info++) { KEY_PART_INFO *key_part= key_info->key_part; @@ -5962,8 +5966,8 @@ static int get_schema_stat_record(THD *thd, TABLE_LIST *tables, KEY *key=show_table->key_info+i; if (key->rec_per_key[j]) { - ha_rows records=(show_table->file->stats.records / - key->rec_per_key[j]); + ha_rows records=((double) show_table->stat_records() / + key->actual_rec_per_key(j)); table->field[9]->store((longlong) records, TRUE); table->field[9]->set_notnull(); } diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc new file mode 100644 index 00000000000..618fd9b7799 --- /dev/null +++ b/sql/sql_statistics.cc @@ -0,0 +1,3049 @@ +/* Copyright (C) 2009 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/** + @file + + @brief + functions to update persitent statistical tables and to read from them + + @defgroup Query_Optimizer Query Optimizer + @{ +*/ + +#include "sql_base.h" +#include "key.h" +#include "sql_statistics.h" +#include "my_atomic.h" + +/* + The system variable 'use_stat_tables' can take one of the + following values: + "never", "complementary", "preferably". + If the values of the variable 'use_stat_tables' is set to + "never then any statistical data from the persistent statistical tables + is ignored by the optimizer. + If the value of the variable 'use_stat_tables' is set to + "complementary" then a particular statistical characteristic is used + by the optimizer only if the database engine does not provide similar + statistics. For example, 'nulls_ratio' for table columns currently + are not provided by any engine. So optimizer uses this statistical data + from the statistical tables. At the same time it does not use + 'avg_frequency' for any index prefix from the statistical tables since + the a similar statistical characteristic 'records_per_key' can be + requested from the database engine. + If the value the variable 'use_stat_tables' is set to + "preferably" the optimizer uses a particular statistical data only if + it can't be found in the statistical data. + If an ANALYZE command is executed then it results in collecting + statistical data for the tables specified by the command and storing + the collected statistics in the persistent statistical tables only + when the value of the variable 'use_stat_tables' is not + equal to "never". +*/ + +/* Currently there are only 3 persistent statistical tables */ +static const uint STATISTICS_TABLES= 3; + +/* + The names of the statistical tables in this array must correspond the + definitions of the tables in the file ../scripts/mysql_system_tables.sql +*/ +static const LEX_STRING stat_table_name[STATISTICS_TABLES]= +{ + { C_STRING_WITH_LEN("table_stats") }, + { C_STRING_WITH_LEN("column_stats") }, + { C_STRING_WITH_LEN("index_stats") } +}; + +/* Name of database to which the statistical tables belong */ +static const LEX_STRING stat_tables_db_name= { C_STRING_WITH_LEN("mysql") }; + + +/** + @details + The function builds a list of TABLE_LIST elements for system statistical + tables using array of TABLE_LIST passed as a parameter. + The lock type of each element is set to TL_READ if for_write = FALSE, + otherwise it is set to TL_WRITE. +*/ + +static +inline void init_table_list_for_stat_tables(TABLE_LIST *tables, bool for_write) +{ + uint i; + + memset((char *) &tables[0], 0, sizeof(TABLE_LIST) * STATISTICS_TABLES); + + for (i= 0; i < STATISTICS_TABLES; i++) + { + tables[i].db= stat_tables_db_name.str; + tables[i].db_length= stat_tables_db_name.length; + tables[i].alias= tables[i].table_name= stat_table_name[i].str; + tables[i].table_name_length= stat_table_name[i].length; + tables[i].lock_type= for_write ? TL_WRITE : TL_READ; + if (i < STATISTICS_TABLES - 1) + tables[i].next_global= tables[i].next_local= + tables[i].next_name_resolution_table= &tables[i+1]; + if (i != 0) + tables[i].prev_global= &tables[i-1].next_global; + } +} + + +/** + @details + The function builds a TABLE_LIST containing only one element 'tbl' for + the statistical table called 'stat_tab_name'. + The lock type of the element is set to TL_READ if for_write = FALSE, + otherwise it is set to TL_WRITE. +*/ + +static +inline void init_table_list_for_single_stat_table(TABLE_LIST *tbl, + const LEX_STRING *stat_tab_name, + bool for_write) +{ + memset((char *) tbl, 0, sizeof(TABLE_LIST)); + + tbl->db= stat_tables_db_name.str; + tbl->db_length= stat_tables_db_name.length; + tbl->alias= tbl->table_name= stat_tab_name->str; + tbl->table_name_length= stat_tab_name->length; + tbl->lock_type= for_write ? TL_WRITE : TL_READ; +} + + +/** + @brief + Open all statistical tables and lock them +*/ + +static +inline int open_stat_tables(THD *thd, TABLE_LIST *tables, + Open_tables_backup *backup, + bool for_write) +{ + init_table_list_for_stat_tables(tables, for_write); + init_mdl_requests(tables); + return open_system_tables_for_read(thd, tables, backup); +} + + +/** + @brief + Open a statistical table and lock it +*/ +static +inline int open_single_stat_table(THD *thd, TABLE_LIST *table, + const LEX_STRING *stat_tab_name, + Open_tables_backup *backup, + bool for_write) +{ + init_table_list_for_single_stat_table(table, stat_tab_name, for_write); + init_mdl_requests(table); + return open_system_tables_for_read(thd, table, backup); +} + + +/* + The class Column_statistics_collected is a helper class used to collect + statistics on a table column. The class is derived directly from + the class Column_statistics, and, additionally to the fields of the + latter, it contains the fields to accumulate the results of aggregation + for the number of nulls in the column and for the size of the column + values. There is also a container for distinct column values used + to calculate the average number of records per distinct column value. +*/ + +class Column_statistics_collected :public Column_statistics +{ + +private: + Field *column; /* The column to collect statistics on */ + ha_rows nulls; /* To accumulate the number of nulls in the column */ + ulonglong column_total_length; /* To accumulate the size of column values */ + Count_distinct_field *count_distinct; /* The container for distinct + column values */ + + bool is_single_pk_col; /* TRUE <-> the only column of the primary key */ + +public: + + inline void init(THD *thd, Field * table_field); + inline void add(ha_rows rowno); + inline void finish(ha_rows rows); + inline void cleanup(); +}; + + +/** + Stat_table is the base class for classes Table_stat, Column_stat and + Index_stat. The methods of these classes allow us to read statistical + data from statistical tables, write collected statistical data into + statistical tables and update statistical data in these tables + as well as update access fields belonging to the primary key and + delete records by prefixes of the primary key. + Objects of the classes Table_stat, Column_stat and Index stat are used + for reading/writing statistics from/into persistent tables table_stats, + column_stats and index_stats correspondingly. These tables are stored in + the system database 'mysql'. + + Statistics is read and written always for a given database table t. When + an object of any of these classes is created a pointer to the TABLE + structure for this database table is passed as a parameter to the constructor + of the object. The other parameter is a pointer to the TABLE structure for + the corresponding statistical table st. So construction of an object to + read/write statistical data on table t from/into statistical table st + requires both table t and st to be opened. + In some cases the TABLE structure for table t may be undefined. Then + the objects of the classes Table_stat, Column_stat and Index stat are + created by the alternative constructor that require only the name + of the table t and the name of the database it belongs to. Currently the + alternative constructors are used only in the cases when some records + belonging to the table are to be deleted, or its keys are to be updated + + Reading/writing statistical data from/into a statistical table is always + performed by a key. At the moment there is only one key defined for each + statistical table and this key is primary. + The primary key for the table table_stats is built as (db_name, table_name). + The primary key for the table column_stats is built as (db_name, table_name, + column_name). + The primary key for the table index_stats is built as (db_name, table_name, + index_name, prefix_arity). + + Reading statistical data from a statistical table is performed by the + following pattern. First a table dependent method sets the values of the + the fields that comprise the lookup key. Then an implementation of the + method get_stat_values() declared in Stat_table as a pure virtual method + finds the row from the statistical table by the set key. If the row is + found the values of statistical fields are read from this row and are + distributed in the internal structures. + + Let's assume the statistical data is read for table t from database db. + + When statistical data is searched in the table table_stats first + Table_stat::set_key_fields() should set the fields of db_name and + table_name. Then get_stat_values looks for a row by the set key value, + and, if the row is found, reads the value from the column + table_stats.cardinality into the field read_stat.cardinality of the TABLE + structure for table t and sets the value of read_stat.cardinality_is_null + from this structure to FALSE. If the value of the 'cardinality' column + in the row is null or if no row is found read_stat.cardinality_is_null + is set to TRUE. + + When statistical data is searched in the table column_stats first + Column_stat::set_key_fields() should set the fields of db_name, table_name + and column_name with column_name taken out of the only parameter f of the + Field* type passed to this method. After this get_stat_values looks + for a row by the set key value. If the row is found the values of statistical + data columns min_value, max_value, nulls_ratio, avg_length, avg_frequency + are read into internal structures. Values of nulls_ratio, avg_length, + avg_frequency are read into the corresponding fields of the read_stat + structure from the Field object f, while values from min_value and max_value + are copied into the min_value and max_value record buffers attached to the + TABLE structure for table t. + If the value of a statistical column in the found row is null, then the + corresponding flag in the f->read_stat.column_stat_nulls bitmap is set off. + Otherwise the flag is set on. If no row is found for the column the all flags + in f->column_stat_nulls are set off. + + When statistical data is searched in the table index_stats first + Index_stat::set_key_fields() has to be called to set the fields of db_name, + table_name, index_name and prefix_arity. The value of index_name is extracted + from the first parameter key_info of the KEY* type passed to the method. + This parameter specifies the index of interest idx. The second parameter + passed to the method specifies the arity k of the index prefix for which + statistical data is to be read. E.g. if the index idx consists of 3 + components (p1,p2,p3) the table index_stats usually will contain 3 rows for + this index: the first - for the prefix (p1), the second - for the prefix + (p1,p2), and the third - for the the prefix (p1,p2,p3). After the key fields + has been set a call of get_stat_value looks for a row by the set key value. + If the row is found and the value of the avg_frequency column is not null + then this value is assigned to key_info->read_stat.avg_frequency[k]. + Otherwise 0 is assigned to this element. + + The method Stat_table::update_stat is used to write statistical data + collected in the internal structures into a statistical table st. + It is assumed that before any invocation of this method a call of the + function st.set_key_fields has set the values of the primary key fields + that serve to locate the row from the statistical table st where the + the collected statistical data from internal structures are to be written + to. The statistical data is written from the counterparts of the + statistical fields of internal structures into which it would be read + by the functions get_stat_values. The counterpart fields are used + only when statistics is collected + When updating/inserting a row from the statistical table st the method + Stat_table::update_stat calls the implementation of the pure virtual + method store_field_values to transfer statistical data from the fields + of internal structures to the fields of record buffer used for updates + of the statistical table st. +*/ + +class Stat_table +{ + +private: + + /* Handler used for the retrieval of the statistical table stat_table */ + handler *stat_file; + + uint stat_key_length; /* Length of the key to access stat_table */ + uchar *record[2]; /* Record buffers used to access/update stat_table */ + uint stat_key_idx; /* The number of the key to access stat_table */ + + /* This is a helper function used only by the Stat_table constructors */ + void common_init_stat_table() + { + stat_file= stat_table->file; + /* Currently any statistical table has only one key */ + stat_key_idx= 0; + stat_key_info= &stat_table->key_info[stat_key_idx]; + stat_key_length= stat_key_info->key_length; + record[0]= stat_table->record[0]; + record[1]= stat_table->record[1]; + } + +protected: + + /* Statistical table to read statistics from or to update/delete */ + TABLE *stat_table; + KEY *stat_key_info; /* Structure for the index to access stat_table */ + + /* Table for which statistical data is read / updated */ + TABLE *table; + TABLE_SHARE *table_share; /* Table share for 'table */ + LEX_STRING *db_name; /* Name of the database containing 'table' */ + LEX_STRING *table_name; /* Name of the table 'table' */ + + void store_record_for_update() + { + store_record(stat_table, record[1]); + } + + void store_record_for_lookup() + { + store_record(stat_table, record[0]); + } + + bool update_record() + { + int err; + if ((err= stat_file->ha_update_row(record[1], record[0])) && + err != HA_ERR_RECORD_IS_THE_SAME) + return TRUE; + return FALSE; + } + +public: + + + /** + @details + This constructor has to be called by any constructor of the derived + classes. The constructor 'tunes' the private and protected members of + the constructed object to the statistical table 'stat_table' with the + statistical data of our interest and to the table 'tab' for which this + statistics has been collected. + */ + + Stat_table(TABLE *stat, TABLE *tab) + :stat_table(stat), table(tab) + { + table_share= tab->s; + common_init_stat_table(); + db_name= &table_share->db; + table_name= &table_share->table_name; + } + + + /** + @details + This constructor has to be called by any constructor of the derived + classes. The constructor 'tunes' the private and protected members of + the constructed object to the statistical table 'stat_table' with the + statistical data of our interest and to the table t for which this + statistics has been collected. The table t is uniquely specified + by the database name 'db' and the table name 'tab'. + */ + + Stat_table(TABLE *stat, LEX_STRING *db, LEX_STRING *tab) + :stat_table(stat), table_share(NULL) + { + common_init_stat_table(); + db_name= db; + table_name= tab; + } + + + virtual ~Stat_table() {} + + /** + @brief + Store the given values of fields for database name and table name + + @details + This is a purely virtual method. + The implementation for any derived class shall store the given + values of the database name and table name in the corresponding + fields of stat_table. + + @note + The method is called by the update_table_name_key_parts function. + */ + + virtual void change_full_table_name(LEX_STRING *db, LEX_STRING *tab)= 0; + + + /** + @brief + Store statistical data into fields of the statistical table + + @details + This is a purely virtual method. + The implementation for any derived class shall put the appropriate + statistical data into the corresponding fields of stat_table. + + @note + The method is called by the update_stat function. + */ + + virtual void store_stat_fields()= 0; + + + /** + @brief + Read statistical data from fields of the statistical table + + @details + This is a purely virtual method. + The implementation for any derived read shall read the appropriate + statistical data from the corresponding fields of stat_table. + */ + + virtual void get_stat_values()= 0; + + + /** + @brief + Find a record in the statistical table by a primary key + + @details + The function looks for a record in stat_table by its primary key. + It assumes that the key fields have been already stored in the record + buffer of stat_table. + + @retval + FALSE the record is not found + @retval + TRUE the record is found + */ + + bool find_stat() + { + uchar key[MAX_KEY_LENGTH]; + key_copy(key, record[0], stat_key_info, stat_key_length); + return !stat_file->ha_index_read_idx_map(record[0], stat_key_idx, key, + HA_WHOLE_KEY, HA_READ_KEY_EXACT); + } + + + /** + @brief + Find a record in the statistical table by a key prefix value + + @details + The function looks for a record in stat_table by the key value consisting + of 'prefix_parts' major components for the primary index. + It assumes that the key prefix fields have been already stored in the record + buffer of stat_table. + + @retval + FALSE the record is not found + @retval + TRUE the record is found + */ + + bool find_next_stat_for_prefix(uint prefix_parts) + { + uchar key[MAX_KEY_LENGTH]; + uint prefix_key_length= 0; + for (uint i= 0; i < prefix_parts; i++) + prefix_key_length+= stat_key_info->key_part[i].store_length; + key_copy(key, record[0], stat_key_info, prefix_key_length); + key_part_map prefix_map= (key_part_map) ((1 << prefix_parts) - 1); + return !stat_file->ha_index_read_idx_map(record[0], stat_key_idx, key, + prefix_map, HA_READ_KEY_EXACT); + } + + + /** + @brief + Update/insert a record in the statistical table with new statistics + + @details + The function first looks for a record by its primary key in the statistical + table stat_table. If the record is found the function updates statistical + fields of the records. The data for these fields are taken from internal + structures containing info on the table 'table'. If the record is not + found the function inserts a new record with the primary key set to the + search key and the statistical data taken from the internal structures. + The function assumes that the key fields have been already stored in + the record buffer of stat_table. + + @retval + FALSE success with the update/insert of the record + @retval + TRUE failure with the update/insert of the record + + @note + The function calls the virtual method store_stat_fields to populate the + statistical fields of the updated/inserted row with new statistics. + */ + + bool update_stat() + { + if (find_stat()) + { + store_record_for_update(); + store_stat_fields(); + return update_record(); + } + else + { + int err; + store_stat_fields(); + if ((err= stat_file->ha_write_row(record[0]))) + return TRUE; + } + return FALSE; + } + + + /** + @brief + Update the table name fields in the current record of stat_table + + @details + The function updates the fields containing database name and table name + for the last found record in the statistical table stat_table. + The corresponding names for update is taken from the parameters + db and tab. + + @retval + FALSE success with the update of the record + @retval + TRUE failure with the update of the record + + @note + The function calls the virtual method change_full_table_name + to store the new names in the record buffer used for updates. + */ + + bool update_table_name_key_parts(LEX_STRING *db, LEX_STRING *tab) + { + store_record_for_update(); + change_full_table_name(db, tab); + bool rc= update_record(); + store_record_for_lookup(); + return rc; + } + + + /** + @brief + Delete the current record of the statistical table stat_table + + @details + The function deletes the last found record from the statistical + table stat_table. + + @retval + FALSE success with the deletion of the record + @retval + TRUE failure with the deletion of the record + */ + + bool delete_stat() + { + int err; + if ((err= stat_file->ha_delete_row(record[0]))) + return TRUE; + return FALSE; + } +}; + + +/* + An object of the class Table_stat is created to read statistical + data on tables from the statistical table table_stats, to update + table_stats with such statistical data, or to update columns + of the primary key, or to delete the record by its primary key or + its prefix. + Rows from the statistical table are read and updated always by + primary key. +*/ + +class Table_stat: public Stat_table +{ + +private: + + Field *db_name_field; /* Field for the column table_stats.db_name */ + Field *table_name_field; /* Field for the column table_stats.table_name */ + + void common_init_table_stat() + { + db_name_field= stat_table->field[TABLE_STAT_DB_NAME]; + table_name_field= stat_table->field[TABLE_STAT_TABLE_NAME]; + } + + void change_full_table_name(LEX_STRING *db, LEX_STRING *tab) + { + db_name_field->store(db->str, db->length, system_charset_info); + table_name_field->store(tab->str, tab->length, system_charset_info); + } + +public: + + /** + @details + The constructor 'tunes' the private and protected members of the + constructed object for the statistical table table_stats to read/update + statistics on table 'tab'. The TABLE structure for the table table_stat + must be passed as a value for the parameter 'stat'. + */ + + Table_stat(TABLE *stat, TABLE *tab) :Stat_table(stat, tab) + { + common_init_table_stat(); + } + + + /** + @details + The constructor 'tunes' the private and protected members of the + object constructed for the statistical table table_stat for + the future updates/deletes of the record concerning the table 'tab' + from the database 'db'. + */ + + Table_stat(TABLE *stat, LEX_STRING *db, LEX_STRING *tab) + :Stat_table(stat, db, tab) + { + common_init_table_stat(); + } + + + /** + @brief + Set the key fields for the statistical table table_stat + + @details + The function sets the values of the fields db_name and table_name + in the record buffer for the statistical table table_stat. + These fields comprise the primary key for the table. + + @note + The function is supposed to be called before any use of the + method find_stat for an object of the Table_stat class. + */ + + void set_key_fields() + { + db_name_field->store(db_name->str, db_name->length, system_charset_info); + table_name_field->store(table_name->str, table_name->length, + system_charset_info); + } + + + /** + @brief + Store statistical data into statistical fields of table_stat + + @details + This implementation of a purely virtual method sets the value of the + column 'cardinality' of the statistical table table_stat according to + the value of the flag write_stat.cardinality_is_null and the value of + the field write_stat.cardinality' from the TABLE structure for 'table'. + */ + + void store_stat_fields() + { + Field *stat_field= stat_table->field[TABLE_STAT_CARDINALITY]; + if (table->collected_stats->cardinality_is_null) + stat_field->set_null(); + else + { + stat_field->set_notnull(); + stat_field->store(table->collected_stats->cardinality); + } + } + + + /** + @brief + Read statistical data from statistical fields of table_stat + + @details + This implementation of a purely virtual method first looks for a record + the statistical table table_stat by its primary key set the record + buffer with the help of Table_stat::set_key_fields. Then, if the row is + found the function reads the value of the column 'cardinality' of the table + table_stat and sets the value of the flag read_stat.cardinality_is_null + and the value of the field read_stat.cardinality' from the TABLE structure + for 'table' accordingly. + */ + + void get_stat_values() + { + Table_statistics *read_stats= table_share->stats_cb.table_stats; + read_stats->cardinality_is_null= TRUE; + read_stats->cardinality= 0; + if (find_stat()) + { + Field *stat_field= stat_table->field[TABLE_STAT_CARDINALITY]; + if (!stat_field->is_null()) + { + read_stats->cardinality_is_null= FALSE; + read_stats->cardinality= stat_field->val_int(); + } + } + } + +}; + + +/* + An object of the class Column_stat is created to read statistical data + on table columns from the statistical table column_stats, to update + column_stats with such statistical data, or to update columns + of the primary key, or to delete the record by its primary key or + its prefix. + Rows from the statistical table are read and updated always by + primary key. +*/ + +class Column_stat: public Stat_table +{ + +private: + + Field *db_name_field; /* Field for the column column_stats.db_name */ + Field *table_name_field; /* Field for the column column_stats.table_name */ + Field *column_name_field; /* Field for the column column_stats.column_name */ + + Field *table_field; /* Field from 'table' to read /update statistics on */ + + void common_init_column_stat_table() + { + db_name_field= stat_table->field[COLUMN_STAT_DB_NAME]; + table_name_field= stat_table->field[COLUMN_STAT_TABLE_NAME]; + column_name_field= stat_table->field[COLUMN_STAT_COLUMN_NAME]; + } + + void change_full_table_name(LEX_STRING *db, LEX_STRING *tab) + { + db_name_field->store(db->str, db->length, system_charset_info); + table_name_field->store(tab->str, tab->length, system_charset_info); + } + +public: + + /** + @details + The constructor 'tunes' the private and protected members of the + constructed object for the statistical table column_stats to read/update + statistics on fields of the table 'tab'. The TABLE structure for the table + column_stats must be passed as a value for the parameter 'stat'. + */ + + Column_stat(TABLE *stat, TABLE *tab) :Stat_table(stat, tab) + { + common_init_column_stat_table(); + } + + + /** + @details + The constructor 'tunes' the private and protected members of the + object constructed for the statistical table column_stats for + the future updates/deletes of the record concerning the table 'tab' + from the database 'db'. + */ + + Column_stat(TABLE *stat, LEX_STRING *db, LEX_STRING *tab) + :Stat_table(stat, db, tab) + { + common_init_column_stat_table(); + } + + /** + @brief + Set table name fields for the statistical table column_stats + + @details + The function stores the values of the fields db_name and table_name + of the statistical table column_stats in the record buffer. + */ + + void set_full_table_name() + { + db_name_field->store(db_name->str, db_name->length, system_charset_info); + table_name_field->store(table_name->str, table_name->length, + system_charset_info); + } + + + /** + @brief + Set the key fields for the statistical table column_stats + + @param + col Field for the 'table' column to read/update statistics on + + @details + The function stores the values of the fields db_name, table_name and + column_name in the record buffer for the statistical table column_stats. + These fields comprise the primary key for the table. + It also sets table_field to the passed parameter. + + @note + The function is supposed to be called before any use of the + method find_stat for an object of the Column_stat class. + */ + + void set_key_fields(Field *col) + { + set_full_table_name(); + const char *column_name= col->field_name; + column_name_field->store(column_name, strlen(column_name), + system_charset_info); + table_field= col; + } + + + /** + @brief + Update the table name fields in the current record of stat_table + + @details + The function updates the primary key fields containing database name, + table name, and column name for the last found record in the statistical + table column_stats. + + @retval + FALSE success with the update of the record + @retval + TRUE failure with the update of the record + */ + + bool update_column_key_part(const char *col) + { + store_record_for_update(); + set_full_table_name(); + column_name_field->store(col, strlen(col), system_charset_info); + bool rc= update_record(); + store_record_for_lookup(); + return rc; + } + + + /** + @brief + Store statistical data into statistical fields of column_stats + + @details + This implementation of a purely virtual method sets the value of the + columns 'min_value', 'max_value', 'nulls_ratio', 'avg_length' and + 'avg_frequency' of the stistical table columns_stat according to the + contents of the bitmap write_stat.column_stat_nulls and the values + of the fields min_value, max_value, nulls_ratio, avg_length and + avg_frequency of the structure write_stat from the Field structure + for the field 'table_field'. + The value of the k-th column in the table columns_stat is set to NULL + if the k-th bit in the bitmap 'column_stat_nulls' is set to 1. + + @note + A value from the field min_value/max_value is always converted + into a utf8 string. If the length of the column 'min_value'/'max_value' + is less than the length of the string the string is trimmed to fit the + length of the column. + */ + + void store_stat_fields() + { + char buff[MAX_FIELD_WIDTH]; + String val(buff, sizeof(buff), &my_charset_utf8_bin); + + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_AVG_FREQUENCY; i++) + { + Field *stat_field= stat_table->field[i]; + if (table_field->collected_stats->is_null(i)) + stat_field->set_null(); + else + { + stat_field->set_notnull(); + switch (i) { + case COLUMN_STAT_MIN_VALUE: + if (table_field->type() == MYSQL_TYPE_BIT) + stat_field->store(table_field->collected_stats->min_value->val_int()); + else + { + table_field->collected_stats->min_value->val_str(&val); + stat_field->store(val.ptr(), val.length(), &my_charset_utf8_bin); + } + break; + case COLUMN_STAT_MAX_VALUE: + if (table_field->type() == MYSQL_TYPE_BIT) + stat_field->store(table_field->collected_stats->max_value->val_int()); + else + { + table_field->collected_stats->max_value->val_str(&val); + stat_field->store(val.ptr(), val.length(), &my_charset_utf8_bin); + } + break; + case COLUMN_STAT_NULLS_RATIO: + stat_field->store(table_field->collected_stats->get_nulls_ratio()); + break; + case COLUMN_STAT_AVG_LENGTH: + stat_field->store(table_field->collected_stats->get_avg_length()); + break; + case COLUMN_STAT_AVG_FREQUENCY: + stat_field->store(table_field->collected_stats->get_avg_frequency()); + break; + } + } + } + } + + + /** + @brief + Read statistical data from statistical fields of column_stats + + @details + This implementation of a purely virtual method first looks for a record + the statistical table column_stats by its primary key set the record + buffer with the help of Column_stat::set_key_fields. Then, if the row is + found, the function reads the values of the columns 'min_value', + 'max_value', 'nulls_ratio', 'avg_length' and 'avg_frequency' of the + table column_stat and sets accordingly the value of the bitmap + read_stat.column_stat_nulls' and the values of the fields min_value, + max_value, nulls_ratio, avg_length and avg_frequency of the structure + read_stat from the Field structure for the field 'table_field'. + */ + + void get_stat_values() + { + table_field->read_stats->set_all_nulls(); + + if (table_field->read_stats->min_value) + table_field->read_stats->min_value->set_null(); + if (table_field->read_stats->max_value) + table_field->read_stats->max_value->set_null(); + + if (find_stat()) + { + char buff[MAX_FIELD_WIDTH]; + String val(buff, sizeof(buff), &my_charset_utf8_bin); + + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_AVG_FREQUENCY; i++) + { + Field *stat_field= stat_table->field[i]; + + if (!stat_field->is_null() && + (i > COLUMN_STAT_MAX_VALUE || + (i == COLUMN_STAT_MIN_VALUE && + table_field->read_stats->min_value) || + (i == COLUMN_STAT_MAX_VALUE && + table_field->read_stats->max_value))) + { + table_field->read_stats->set_not_null(i); + + switch (i) { + case COLUMN_STAT_MIN_VALUE: + stat_field->val_str(&val); + table_field->read_stats->min_value->store(val.ptr(), val.length(), + &my_charset_utf8_bin); + break; + case COLUMN_STAT_MAX_VALUE: + stat_field->val_str(&val); + table_field->read_stats->max_value->store(val.ptr(), val.length(), + &my_charset_utf8_bin); + break; + case COLUMN_STAT_NULLS_RATIO: + table_field->read_stats->set_nulls_ratio(stat_field->val_real()); + break; + case COLUMN_STAT_AVG_LENGTH: + table_field->read_stats->set_avg_length(stat_field->val_real()); + break; + case COLUMN_STAT_AVG_FREQUENCY: + table_field->read_stats->set_avg_frequency(stat_field->val_real()); + break; + } + } + } + } + } + +}; + + +/* + An object of the class Index_stat is created to read statistical + data on tables from the statistical table table_stat, to update + index_stats with such statistical data, or to update columns + of the primary key, or to delete the record by its primary key or + its prefix. + Rows from the statistical table are read and updated always by + primary key. +*/ + +class Index_stat: public Stat_table +{ + +private: + + Field *db_name_field; /* Field for the column index_stats.db_name */ + Field *table_name_field; /* Field for the column index_stats.table_name */ + Field *index_name_field; /* Field for the column index_stats.table_name */ + Field *prefix_arity_field; /* Field for the column index_stats.prefix_arity */ + + KEY *table_key_info; /* Info on the index to read/update statistics on */ + uint prefix_arity; /* Number of components of the index prefix of interest */ + + void common_init_index_stat_table() + { + db_name_field= stat_table->field[INDEX_STAT_DB_NAME]; + table_name_field= stat_table->field[INDEX_STAT_TABLE_NAME]; + index_name_field= stat_table->field[INDEX_STAT_INDEX_NAME]; + prefix_arity_field= stat_table->field[INDEX_STAT_PREFIX_ARITY]; + } + + void change_full_table_name(LEX_STRING *db, LEX_STRING *tab) + { + db_name_field->store(db->str, db->length, system_charset_info); + table_name_field->store(tab->str, tab->length, system_charset_info); + } + +public: + + + /** + @details + The constructor 'tunes' the private and protected members of the + constructed object for the statistical table index_stats to read/update + statistics on prefixes of different indexes of the table 'tab'. + The TABLE structure for the table index_stats must be passed as a value + for the parameter 'stat'. + */ + + Index_stat(TABLE *stat, TABLE*tab) :Stat_table(stat, tab) + { + common_init_index_stat_table(); + } + + + /** + @details + The constructor 'tunes' the private and protected members of the + object constructed for the statistical table index_stats for + the future updates/deletes of the record concerning the table 'tab' + from the database 'db'. + */ + + Index_stat(TABLE *stat, LEX_STRING *db, LEX_STRING *tab) + :Stat_table(stat, db, tab) + { + common_init_index_stat_table(); + } + + + /** + @brief + Set table name fields for the statistical table index_stats + + @details + The function stores the values of the fields db_name and table_name + of the statistical table index_stats in the record buffer. + */ + + void set_full_table_name() + { + db_name_field->store(db_name->str, db_name->length, system_charset_info); + table_name_field->store(table_name->str, table_name->length, + system_charset_info); + } + + /** + @brief + Set the key fields of index_stats used to access records for index prefixes + + @param + index_info Info for the index of 'table' to read/update statistics on + + @details + The function sets the values of the fields db_name, table_name and + index_name in the record buffer for the statistical table index_stats. + It also sets table_key_info to the passed parameter. + + @note + The function is supposed to be called before any use of the method + find_next_stat_for_prefix for an object of the Index_stat class. + */ + + void set_index_prefix_key_fields(KEY *index_info) + { + set_full_table_name(); + char *index_name= index_info->name; + index_name_field->store(index_name, strlen(index_name), + system_charset_info); + table_key_info= index_info; + } + + + /** + @brief + Set the key fields for the statistical table index_stats + + @param + index_info Info for the index of 'table' to read/update statistics on + @param + index_prefix_arity Number of components in the index prefix of interest + + @details + The function sets the values of the fields db_name, table_name and + index_name, prefix_arity in the record buffer for the statistical + table index_stats. These fields comprise the primary key for the table. + + @note + The function is supposed to be called before any use of the + method find_stat for an object of the Index_stat class. + */ + + void set_key_fields(KEY *index_info, uint index_prefix_arity) + { + set_index_prefix_key_fields(index_info); + prefix_arity= index_prefix_arity; + prefix_arity_field->store(index_prefix_arity, TRUE); + } + + + /** + @brief + Store statistical data into statistical fields of table index_stats + + @details + This implementation of a purely virtual method sets the value of the + column 'avg_frequency' of the statistical table index_stats according to + the value of write_stat.avg_frequency[Index_stat::prefix_arity] + from the KEY_INFO structure 'table_key_info'. + If the value of write_stat. avg_frequency[Index_stat::prefix_arity] is + equal to 0, the value of the column is set to NULL. + */ + + void store_stat_fields() + { + Field *stat_field= stat_table->field[INDEX_STAT_AVG_FREQUENCY]; + double avg_frequency= + table_key_info->collected_stats->get_avg_frequency(prefix_arity-1); + if (avg_frequency == 0) + stat_field->set_null(); + else + { + stat_field->set_notnull(); + stat_field->store(avg_frequency); + } + } + + + /** + @brief + Read statistical data from statistical fields of index_stats + + @details + This implementation of a purely virtual method first looks for a record the + statistical table index_stats by its primary key set the record buffer with + the help of Index_stat::set_key_fields. If the row is found the function + reads the value of the column 'avg_freguency' of the table index_stat and + sets the value of read_stat.avg_frequency[Index_stat::prefix_arity] + from the KEY_INFO structure 'table_key_info' accordingly. If the value of + the column is NULL, read_stat.avg_frequency[Index_stat::prefix_arity] is + set to 0. Otherwise, read_stat.avg_frequency[Index_stat::prefix_arity] is + set to the value of the column. + */ + + void get_stat_values() + { + double avg_frequency= 0; + if(find_stat()) + { + Field *stat_field= stat_table->field[INDEX_STAT_AVG_FREQUENCY]; + if (!stat_field->is_null()) + avg_frequency= stat_field->val_real(); + } + table_key_info->read_stats->set_avg_frequency(prefix_arity-1, avg_frequency); + } + +}; + + +/* + The class Count_distinct_field is a helper class used to calculate + the number of distinct values for a column. The class employs the + Unique class for this purpose. + The class Count_distinct_field is used only by the function + collect_statistics_for_table to calculate the values for + column avg_frequency of the statistical table column_stats. +*/ + +class Count_distinct_field: public Sql_alloc +{ +protected: + + /* Field for which the number of distinct values is to be find out */ + Field *table_field; + Unique *tree; /* The helper object to contain distinct values */ + uint tree_key_length; /* The length of the keys for the elements of 'tree */ + +public: + + /** + @param + field Field for which the number of distinct values is + to be find out + @param + max_heap_table_size The limit for the memory used by the RB tree container + of the constructed Unique object 'tree' + + @details + The constructor sets the values of 'table_field' and 'tree_key_length', + and then calls the 'new' operation to create a Unique object for 'tree'. + The type of 'field' and the value max_heap_table_size of determine the set + of the parameters to be passed to the constructor of the Unique object. + */ + + Count_distinct_field(Field *field, uint max_heap_table_size) + { + qsort_cmp2 compare_key; + void* cmp_arg; + enum enum_field_types f_type= field->type(); + + table_field= field; + tree_key_length= field->pack_length(); + + if ((f_type == MYSQL_TYPE_VARCHAR) || + (!field->binary() && (f_type == MYSQL_TYPE_STRING || + f_type == MYSQL_TYPE_VAR_STRING))) + { + compare_key= (qsort_cmp2) simple_str_key_cmp; + cmp_arg= (void*) field; + } + else + { + cmp_arg= (void*) &tree_key_length; + compare_key= (qsort_cmp2) simple_raw_key_cmp; + } + + tree= new Unique(compare_key, cmp_arg, + tree_key_length, max_heap_table_size); + } + + virtual ~Count_distinct_field() + { + delete tree; + tree= NULL; + } + + /* + @brief + Check whether the Unique object tree has been successfully created + */ + bool exists() + { + return (tree != NULL); + } + + /* + @brief + Add the value of 'field' to the container of the Unique object 'tree' + */ + virtual bool add() + { + return tree->unique_add(table_field->ptr); + } + + /* + @brief + Calculate the number of elements accumulated in the container of 'tree' + */ + ulonglong get_value() + { + ulonglong count; + if (tree->elements == 0) + return (ulonglong) tree->elements_in_tree(); + count= 0; + tree->walk(count_distinct_walk, (void*) &count); + return count; + } +}; + + +/* + The class Count_distinct_field_bit is derived from the class + Count_distinct_field to be used only for fields of the MYSQL_TYPE_BIT type. + The class provides a different implementation for the method add +*/ + +class Count_distinct_field_bit: public Count_distinct_field +{ +public: + Count_distinct_field_bit(Field *field, uint max_heap_table_size) + :Count_distinct_field(field, max_heap_table_size) {} + bool add() + { + longlong val= table_field->val_int(); + return tree->unique_add(&val); + } +}; + + +/* + The class Index_prefix_calc is a helper class used to calculate the values + for the column 'avg_frequency' of the statistical table index_stats. + For any table t from the database db and any k-component prefix of the + index i for this table the row from index_stats with the primary key + (db,t,i,k) must contain in the column 'avg_frequency' either NULL or + the number that is the ratio of N and V, where N is the number of index + entries without NULL values in the first k components of the index i, + and V is the number of distinct tuples composed of the first k components + encountered among these index entries. + Currently the objects of this class are used only by the function + collect_statistics_for_index. +*/ + +class Index_prefix_calc: public Sql_alloc +{ + +private: + + /* Table containing index specified by index_info */ + TABLE *index_table; + /* Info for the index i for whose prefix 'avg_frequency' is calculated */ + KEY *index_info; + /* The maximum number of the components in the prefixes of interest */ + uint prefixes; + bool empty; + + /* This structure is created for every k components of the index i */ + class Prefix_calc_state + { + public: + /* + The number of the scanned index entries without nulls + in the first k components + */ + ulonglong entry_count; + /* + The number if the scanned index entries without nulls with + the last encountered k-component prefix + */ + ulonglong prefix_count; + /* The values of the last encountered k-component prefix */ + Cached_item *last_prefix; + }; + + /* + Array of structures used to calculate 'avg_frequency' for different + prefixes of the index i + */ + Prefix_calc_state *calc_state; + +public: + + bool is_single_comp_pk; + + Index_prefix_calc(TABLE *table, KEY *key_info) + : index_table(table), index_info(key_info) + { + uint i; + Prefix_calc_state *state; + uint key_parts= table->actual_n_key_parts(key_info); + empty= TRUE; + prefixes= 0; + + is_single_comp_pk= FALSE; + uint pk= table->s->primary_key; + if (table->key_info - key_info == pk && table->key_info[pk].key_parts == 1) + { + prefixes= 1; + is_single_comp_pk= TRUE; + return; + } + + if ((calc_state= + (Prefix_calc_state *) sql_alloc(sizeof(Prefix_calc_state)*key_parts))) + { + uint keyno= key_info-table->key_info; + for (i= 0, state= calc_state; i < key_parts; i++, state++) + { + /* + Do not consider prefixes containing a component that is only part + of the field. This limitation is set to avoid fetching data when + calculating the values of 'avg_frequency' for prefixes. + */ + if (!key_info->key_part[i].field->part_of_key.is_set(keyno)) + break; + + if (!(state->last_prefix= + new Cached_item_field(key_info->key_part[i].field))) + break; + state->entry_count= state->prefix_count= 0; + prefixes++; + } + } + } + + + /** + @breif + Change the elements of calc_state after reading the next index entry + + @details + This function is to be called at the index scan each time the next + index entry has been read into the record buffer. + For each of the index prefixes the function checks whether nulls + are encountered in any of the k components of the prefix. + If this is not the case the value of calc_state[k-1].entry_count + is incremented by 1. Then the function checks whether the value of + any of these k components has changed. If so, the value of + calc_state[k-1].prefix_count is incremented by 1. + */ + + void add() + { + uint i; + Prefix_calc_state *state; + uint first_changed= prefixes; + for (i= prefixes, state= calc_state+prefixes-1; i; i--, state--) + { + if (state->last_prefix->cmp()) + first_changed= i-1; + } + if (empty) + { + first_changed= 0; + empty= FALSE; + } + for (i= 0, state= calc_state; i < prefixes; i++, state++) + { + if (state->last_prefix->null_value) + break; + if (i >= first_changed) + state->prefix_count++; + state->entry_count++; + } + } + + /** + @brief + Calculate the values of avg_frequency for all prefixes of an index + + @details + This function is to be called after the index scan to count the number + of distinct index prefixes has been done. The function calculates + the value of avg_frequency for the index prefix with k components + as calc_state[k-1].entry_count/calc_state[k-1].prefix_count. + If calc_state[k-1].prefix_count happens to be 0, the value of + avg_frequency[k-1] is set to 0, i.e. is considered as unknown. + */ + + void get_avg_frequency() + { + uint i; + Prefix_calc_state *state; + + if (is_single_comp_pk) + { + index_info->collected_stats->set_avg_frequency(0, 1.0); + return; + } + + for (i= 0, state= calc_state; i < prefixes; i++, state++) + { + if (i < prefixes) + { + double val= state->prefix_count == 0 ? + 0 : (double) state->entry_count / state->prefix_count; + index_info->collected_stats->set_avg_frequency(i, val); + } + } + } +}; + + +/** + @brief + Create fields for min/max values to collect column statistics + + @param + table Table the fields are created for + + @details + The function first allocates record buffers to store min/max values + for 'table's fields. Then for each table field f it creates Field structures + that points to these buffers rather that to the record buffer as the + Field object for f does. The pointers of the created fields are placed + in the collected_stats structure of the Field object for f. + The function allocates the buffers for min/max values in the table + memory. + + @note + The buffers allocated when min/max values are used to read statistics + from the persistent statistical tables differ from those buffers that + are used when statistics on min/max values for column is collected + as they are allocated in different mem_roots. + The same is true for the fields created for min/max values. +*/ + +static +void create_min_max_statistical_fields_for_table(TABLE *table) +{ + uint rec_buff_length= table->s->rec_buff_length; + + if ((table->collected_stats->min_max_record_buffers= + (uchar *) alloc_root(&table->mem_root, 2*rec_buff_length))) + { + uchar *record= table->collected_stats->min_max_record_buffers; + memset(record, 0, 2*rec_buff_length); + + for (uint i=0; i < 2; i++, record+= rec_buff_length) + { + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *fld; + Field *table_field= *field_ptr; + my_ptrdiff_t diff= record-table->record[0]; + if (!bitmap_is_set(table->read_set, table_field->field_index)) + continue; + if (!(fld= table_field->clone(&table->mem_root, table, diff, TRUE))) + continue; + if (i == 0) + table_field->collected_stats->min_value= fld; + else + table_field->collected_stats->max_value= fld; + } + } + } +} + + +/** + @brief + Create fields for min/max values to read column statistics + + @param + thd Thread handler + @param + table_share Table share the fields are created for + @param + is_safe TRUE <-> at any time only one thread can perform the function + + @details + The function first allocates record buffers to store min/max values + for 'table_share's fields. Then for each field f it creates Field structures + that points to these buffers rather that to the record buffer as the + Field object for f does. The pointers of the created fields are placed + in the read_stats structure of the Field object for f. + The function allocates the buffers for min/max values in the table share + memory. + If the parameter is_safe is TRUE then it is guaranteed that at any given time + only one thread is executed the code of the function. + + @note + The buffers allocated when min/max values are used to collect statistics + from the persistent statistical tables differ from those buffers that + are used when statistics on min/max values for column is read as they + are allocated in different mem_roots. + The same is true for the fields created for min/max values. +*/ + +static +void create_min_max_statistical_fields_for_table_share(THD *thd, + TABLE_SHARE *table_share) +{ + TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb; + Table_statistics *stats= stats_cb->table_stats; + + if (stats->min_max_record_buffers) + return; + + uint rec_buff_length= table_share->rec_buff_length; + + if ((stats->min_max_record_buffers= + (uchar *) alloc_root(&stats_cb->mem_root, 2*rec_buff_length))) + { + uchar *record= stats->min_max_record_buffers; + memset(record, 0, 2*rec_buff_length); + + for (uint i=0; i < 2; i++, record+= rec_buff_length) + { + for (Field **field_ptr= table_share->field; *field_ptr; field_ptr++) + { + Field *fld; + Field *table_field= *field_ptr; + my_ptrdiff_t diff= record - table_share->default_values; + if (!(fld= table_field->clone(&stats_cb->mem_root, diff))) + continue; + if (i == 0) + table_field->read_stats->min_value= fld; + else + table_field->read_stats->max_value= fld; + } + } + } + +} + + +/** + @brief + Allocate memory for the table's statistical data to be collected + + @param + table Table for which the memory for statistical data is allocated + + @note + The function allocates the memory for the statistical data on 'table' with + the intention to collect the data there. The memory is allocated for + the statistics on the table, on the table's columns, and on the table's + indexes. The memory is allocated in the table's mem_root. + + @retval + 0 If the memory for all statistical data has been successfully allocated + @retval + 1 Otherwise + + @note + Each thread allocates its own memory to collect statistics on the table + It allows us, for example, to collect statistics on the different indexes + of the same table in parallel. +*/ + +int alloc_statistics_for_table(THD* thd, TABLE *table) +{ + Field **field_ptr; + uint fields; + + DBUG_ENTER("alloc_statistics_for_table"); + + + Table_statistics *table_stats= + (Table_statistics *) alloc_root(&table->mem_root, + sizeof(Table_statistics)); + + fields= table->s->fields ; + Column_statistics_collected *column_stats= + (Column_statistics_collected *) alloc_root(&table->mem_root, + sizeof(Column_statistics_collected) * + (fields+1)); + + uint keys= table->s->keys; + Index_statistics *index_stats= + (Index_statistics *) alloc_root(&table->mem_root, + sizeof(Index_statistics) * keys); + + uint key_parts= table->s->ext_key_parts; + ulong *idx_avg_frequency= (ulong*) alloc_root(&table->mem_root, + sizeof(ulong) * key_parts); + + if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency) + DBUG_RETURN(1); + + table->collected_stats= table_stats; + table_stats->column_stats= column_stats; + table_stats->index_stats= index_stats; + table_stats->idx_avg_frequency= idx_avg_frequency; + + memset(column_stats, 0, sizeof(Column_statistics) * (fields+1)); + + for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++) + { + (*field_ptr)->collected_stats= column_stats; + (*field_ptr)->collected_stats->max_value= NULL; + (*field_ptr)->collected_stats->min_value= NULL; + } + + memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts); + + KEY *key_info, *end; + for (key_info= table->key_info, end= key_info + table->s->keys; + key_info < end; + key_info++, index_stats++) + { + key_info->collected_stats= index_stats; + key_info->collected_stats->init_avg_frequency(idx_avg_frequency); + idx_avg_frequency+= key_info->ext_key_parts; + } + + create_min_max_statistical_fields_for_table(table); + + DBUG_RETURN(0); +} + + +/** + @brief + Check whether any persistent statistics for the processed command is needed + + @param + thd The thread handle + + @details + The function checks whether any persitent statistics for the processed + command is needed to be read. + + @retval + TRUE statistics is needed to be read + @retval + FALSE Otherwise +*/ + +static +inline bool statistics_for_command_is_needed(THD *thd) +{ + if (thd->bootstrap || thd->variables.use_stat_tables == NEVER) + return FALSE; + + switch(thd->lex->sql_command) { + case SQLCOM_SELECT: + case SQLCOM_INSERT: + case SQLCOM_INSERT_SELECT: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + break; + default: + return FALSE; + } + + return TRUE; +} + + +/** + @brief + Allocate memory for the statistical data used by a table share + + @param + thd Thread handler + @param + table_share Table share for which the memory for statistical data is allocated + @param + is_safe TRUE <-> at any time only one thread can perform the function + + @note + The function allocates the memory for the statistical data on a table in the + table's share memory with the intention to read the statistics there from + the system persistent statistical tables mysql.table_stat, mysql.column_stats, + mysql.index_stats. The memory is allocated for the statistics on the table, + on the tables's columns, and on the table's indexes. The memory is allocated + in the table_share's mem_root. + If the parameter is_safe is TRUE then it is guaranteed that at any given time + only one thread is executed the code of the function. + + @retval + 0 If the memory for all statistical data has been successfully allocated + @retval + 1 Otherwise + + @note + The situation when more than one thread try to allocate memory for + statistical data is rare. It happens under the following scenario: + 1. One thread executes a query over table t with the system variable + 'use_stat_tables' set to 'never'. + 2. After this the second thread sets 'use_stat_tables' to 'preferably' + and executes a query over table t. + 3. Simultaneously the third thread sets 'use_stat_tables' to 'preferably' + and executes a query over table t. + Here the second and the third threads try to allocate the memory for + statistical data at the same time. The precautions are taken to + guarantee the correctness of the allocation. + + @note + Currently the function always is called with the parameter is_safe set + to FALSE. + +*/ + +int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *table_share, + bool is_safe) +{ + + Field **field_ptr; + KEY *key_info, *end; + TABLE_STATISTICS_CB *stats_cb= &table_share->stats_cb; + + DBUG_ENTER("alloc_statistics_for_table_share"); + + DEBUG_SYNC(thd, "statistics_mem_alloc_start1"); + DEBUG_SYNC(thd, "statistics_mem_alloc_start2"); + + if (!statistics_for_command_is_needed(thd)) + DBUG_RETURN(1); + + if (!is_safe) + mysql_mutex_lock(&table_share->LOCK_ha_data); + + if (stats_cb->stats_can_be_read) + { + if (!is_safe) + mysql_mutex_unlock(&table_share->LOCK_ha_data); + DBUG_RETURN(0); + } + + Table_statistics *table_stats= stats_cb->table_stats; + if (!table_stats) + { + table_stats= (Table_statistics *) alloc_root(&stats_cb->mem_root, + sizeof(Table_statistics)); + if (!table_stats) + { + if (!is_safe) + mysql_mutex_unlock(&table_share->LOCK_ha_data); + DBUG_RETURN(1); + } + memset(table_stats, 0, sizeof(Table_statistics)); + stats_cb->table_stats= table_stats; + } + + uint fields= table_share->fields; + Column_statistics *column_stats= table_stats->column_stats; + if (!column_stats) + { + column_stats= (Column_statistics *) alloc_root(&stats_cb->mem_root, + sizeof(Column_statistics) * + (fields+1)); + if (column_stats) + { + memset(column_stats, 0, sizeof(Column_statistics) * (fields+1)); + table_stats->column_stats= column_stats; + for (field_ptr= table_share->field; + *field_ptr; + field_ptr++, column_stats++) + { + (*field_ptr)->read_stats= column_stats; + (*field_ptr)->read_stats->min_value= NULL; + (*field_ptr)->read_stats->max_value= NULL; + } + create_min_max_statistical_fields_for_table_share(thd, table_share); + } + } + + uint keys= table_share->keys; + Index_statistics *index_stats= table_stats->index_stats; + if (!index_stats) + { + index_stats= (Index_statistics *) alloc_root(&stats_cb->mem_root, + sizeof(Index_statistics) * + keys); + if (index_stats) + { + table_stats->index_stats= index_stats; + for (key_info= table_share->key_info, end= key_info + keys; + key_info < end; + key_info++, index_stats++) + { + key_info->read_stats= index_stats; + } + } + } + + uint key_parts= table_share->ext_key_parts; + ulong *idx_avg_frequency= table_stats->idx_avg_frequency; + if (!idx_avg_frequency) + { + idx_avg_frequency= (ulong*) alloc_root(&stats_cb->mem_root, + sizeof(ulong) * key_parts); + if (idx_avg_frequency) + { + memset(idx_avg_frequency, 0, sizeof(ulong) * key_parts); + table_stats->idx_avg_frequency= idx_avg_frequency; + for (key_info= table_share->key_info, end= key_info + keys; + key_info < end; + key_info++) + { + key_info->read_stats->init_avg_frequency(idx_avg_frequency); + idx_avg_frequency+= key_info->ext_key_parts; + } + } + } + + if (column_stats && index_stats && idx_avg_frequency) + stats_cb->stats_can_be_read= TRUE; + + if (!is_safe) + mysql_mutex_unlock(&table_share->LOCK_ha_data); + + + DBUG_RETURN(0); +} + + +/** + @brief + Initialize the aggregation fields to collect statistics on a column + + @param + thd Thread handler + @param + table_field Column to collect statistics for +*/ + +inline +void Column_statistics_collected::init(THD *thd, Field *table_field) +{ + uint max_heap_table_size= thd->variables.max_heap_table_size; + TABLE *table= table_field->table; + uint pk= table->s->primary_key; + + is_single_pk_col= FALSE; + + if (pk != MAX_KEY && table->key_info[pk].key_parts == 1 && + table->key_info[pk].key_part[0].fieldnr == table_field->field_index + 1) + is_single_pk_col= TRUE; + + column= table_field; + + set_all_nulls(); + + nulls= 0; + column_total_length= 0; + if (is_single_pk_col) + count_distinct= NULL; + if (table_field->flags & BLOB_FLAG) + count_distinct= NULL; + else + { + count_distinct= + table_field->type() == MYSQL_TYPE_BIT ? + new Count_distinct_field_bit(table_field, max_heap_table_size) : + new Count_distinct_field(table_field, max_heap_table_size); + } + if (count_distinct && !count_distinct->exists()) + count_distinct= NULL; +} + + +/** + @brief + Perform aggregation for a row when collecting statistics on a column + + @param + rowno The order number of the row +*/ + +inline +void Column_statistics_collected::add(ha_rows rowno) +{ + + if (column->is_null()) + nulls++; + else + { + column_total_length+= column->value_length(); + if (min_value && column->update_min(min_value, rowno == nulls)) + set_not_null(COLUMN_STAT_MIN_VALUE); + if (max_value && column->update_max(max_value, rowno == nulls)) + set_not_null(COLUMN_STAT_MAX_VALUE); + if (count_distinct) + count_distinct->add(); + } +} + + +/** + @brief + Get the results of aggregation when collecting the statistics on a column + + @param + rows The total number of rows in the table +*/ + +inline +void Column_statistics_collected::finish(ha_rows rows) +{ + double val; + + if (rows) + { + val= (double) nulls / rows; + set_nulls_ratio(val); + set_not_null(COLUMN_STAT_NULLS_RATIO); + } + if (rows - nulls) + { + val= (double) column_total_length / (rows - nulls); + set_avg_length(val); + set_not_null(COLUMN_STAT_AVG_LENGTH); + } + if (count_distinct) + { + ulonglong distincts= count_distinct->get_value(); + if (distincts) + { + val= (double) (rows - nulls) / distincts; + set_avg_frequency(val); + set_not_null(COLUMN_STAT_AVG_FREQUENCY); + } + delete count_distinct; + count_distinct= NULL; + } + else if (is_single_pk_col) + { + val= 1.0; + set_avg_frequency(val); + set_not_null(COLUMN_STAT_AVG_FREQUENCY); + } +} + + +/** + @brief + Clean up auxiliary structures used for aggregation +*/ + +inline +void Column_statistics_collected::cleanup() +{ + if (count_distinct) + { + delete count_distinct; + count_distinct= NULL; + } +} + + +/** + @brief + Collect statistical data on an index + + @param + table The table the index belongs to + index The number of this index in the table + + @details + The function collects the value of 'avg_frequency' for the prefixes + on an index from 'table'. The index is specified by its number. + If the scan is successful the calculated statistics is saved in the + elements of the array write_stat.avg_frequency of the KEY_INFO structure + for the index. The statistics for the prefix with k components is saved + in the element number k-1. + + @retval + 0 If the statistics has been successfully collected + @retval + 1 Otherwise + + @note + The function collects statistics for the index prefixes for one index + scan during which no data is fetched from the table records. That's why + statistical data for prefixes that contain part of a field is not + collected. + The function employs an object of the helper class Index_prefix_calc to + count for each index prefix the number of index entries without nulls and + the number of distinct entries among them. + +*/ + +static +int collect_statistics_for_index(THD *thd, TABLE *table, uint index) +{ + int rc= 0; + KEY *key_info= &table->key_info[index]; + ha_rows rows= 0; + Index_prefix_calc index_prefix_calc(table, key_info); + DBUG_ENTER("collect_statistics_for_index"); + + DEBUG_SYNC(table->in_use, "statistics_collection_start1"); + DEBUG_SYNC(table->in_use, "statistics_collection_start2"); + + if (index_prefix_calc.is_single_comp_pk) + { + index_prefix_calc.get_avg_frequency(); + DBUG_RETURN(rc); + } + + table->key_read= 1; + table->file->extra(HA_EXTRA_KEYREAD); + + table->file->ha_index_init(index, TRUE); + rc= table->file->ha_index_first(table->record[0]); + while (rc != HA_ERR_END_OF_FILE) + { + if (thd->killed) + break; + + if (rc) + break; + rows++; + index_prefix_calc.add(); + rc= table->file->ha_index_next(table->record[0]); + } + table->key_read= 0; + table->file->ha_index_end(); + + rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1; + + if (!rc) + index_prefix_calc.get_avg_frequency(); + + DBUG_RETURN(rc); +} + + +/** + @brief + Collect statistical data for a table + + @param + thd The thread handle + @param + table The table to collect statistics on + + @details + The function collects data for various statistical characteristics on + the table 'table'. These data is saved in the internal fields that could + be reached from 'table'. The data is prepared to be saved in the persistent + statistical table by the function update_statistics_for_table. + The collected statistical values are not placed in the same fields that + keep the statistical data used by the optimizer. Therefore, at any time, + there is no collision between the statistics being collected and the one + used by the optimizer to look for optimal query execution plans for other + clients. + + @retval + 0 If the statistics has been successfully collected + @retval + 1 Otherwise + + @note + The function first collects statistical data for statistical characteristics + to be saved in the statistical tables table_stat and column_stats. To do this + it performs a full table scan of 'table'. At this scan the function collects + statistics on each column of the table and count the total number of the + scanned rows. To calculate the value of 'avg_frequency' for a column the + function constructs an object of the helper class Count_distinct_field + (or its derivation). Currently this class cannot count the number of + distinct values for blob columns. So the value of 'avg_frequency' for + blob columns is always null. + After the full table scan the function calls collect_statistics_for_index + for each table index. The latter performs full index scan for each index. + + @note + Currently the statistical data is collected indiscriminately for all + columns/indexes of 'table', for all statistical characteristics. + TODO. Collect only specified statistical characteristics for specified + columns/indexes. + + @note + Currently the process of collecting statistical data is not optimized. + For example, 'avg_frequency' for a column could be copied from the + 'avg_frequency' collected for an index if this column is used as the + first component of the index. Min and min values for this column could + be extracted from the index as well. +*/ + +int collect_statistics_for_table(THD *thd, TABLE *table) +{ + int rc; + Field **field_ptr; + Field *table_field; + ha_rows rows= 0; + handler *file=table->file; + + DBUG_ENTER("collect_statistics_for_table"); + + table->collected_stats->cardinality_is_null= TRUE; + table->collected_stats->cardinality= 0; + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + if (!bitmap_is_set(table->read_set, table_field->field_index)) + continue; + table_field->collected_stats->init(thd, table_field); + } + + /* Perform a full table scan to collect statistics on 'table's columns */ + if (!(rc= file->ha_rnd_init(TRUE))) + { + DEBUG_SYNC(table->in_use, "statistics_collection_start"); + + while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) + { + if (thd->killed) + break; + + if (rc) + { + if (rc == HA_ERR_RECORD_DELETED) + continue; + break; + } + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + if (!bitmap_is_set(table->read_set, table_field->field_index)) + continue; + table_field->collected_stats->add(rows); + } + rows++; + } + file->ha_rnd_end(); + } + rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1; + + /* + Calculate values for all statistical characteristics on columns and + and for each field f of 'table' save them in the write_stat structure + from the Field object for f. + */ + if (!rc) + { + table->collected_stats->cardinality_is_null= FALSE; + table->collected_stats->cardinality= rows; + } + + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + if (!bitmap_is_set(table->read_set, table_field->field_index)) + continue; + if (!rc) + table_field->collected_stats->finish(rows); + else + table_field->collected_stats->cleanup(); + } + + if (!rc) + { + uint key; + key_map::Iterator it(table->keys_in_use_for_query); + + MY_BITMAP *save_read_set= table->read_set; + table->read_set= &table->tmp_set; + bitmap_set_all(table->read_set); + + /* Collect statistics for indexes */ + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + if ((rc= collect_statistics_for_index(thd, table, key))) + break; + } + + table->read_set= save_read_set; + } + + DBUG_RETURN(rc); +} + + +/** + @brief + Update statistics for a table in the persistent statistical tables + + @param + thd The thread handle + @param + table The table to collect statistics on + + @details + For each statistical table st the function looks for the rows from this + table that contain statistical data on 'table'. If rows with given + statistical characteristics exist they are updated with the new statistical + values taken from internal structures for 'table'. Otherwise new rows + with these statistical characteristics are added into st. + It is assumed that values stored in the statistical tables are found and + saved by the function collect_statistics_for_table. + + @retval + 0 If all statistical tables has been successfully updated + @retval + 1 Otherwise + + @note + The function is called when executing the ANALYZE actions for 'table'. + The function first unlocks the opened table the statistics on which has + been collected, but does not closes it, so all collected statistical data + remains in internal structures for 'table'. Then the function opens the + statistical tables and writes the statistical data for 'table'into them. + It is not allowed just to open statistical tables for writing when some + other tables are locked for reading. + After the statistical tables have been opened they are updated one by one + with the new statistics on 'table'. Objects of the helper classes + Table_stat, Column_stat and Index_stat are employed for this. + After having been updated the statistical system tables are closed. +*/ + +int update_statistics_for_table(THD *thd, TABLE *table) +{ + TABLE_LIST tables[STATISTICS_TABLES]; + Open_tables_backup open_tables_backup; + uint i; + int err; + enum_binlog_format save_binlog_format; + int rc= 0; + TABLE *stat_table; + + DBUG_ENTER("update_statistics_for_table"); + + DEBUG_SYNC(thd, "statistics_update_start"); + + if (open_stat_tables(thd, tables, &open_tables_backup, TRUE)) + { + thd->clear_error(); + DBUG_RETURN(rc); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Update the statistical table table_stats */ + stat_table= tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, table); + restore_record(stat_table, s->default_values); + table_stat.set_key_fields(); + err= table_stat.update_stat(); + if (err) + rc= 1; + + /* Update the statistical table colum_stats */ + stat_table= tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, table); + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + Field *table_field= *field_ptr; + if (!bitmap_is_set(table->read_set, table_field->field_index)) + continue; + restore_record(stat_table, s->default_values); + column_stat.set_key_fields(table_field); + err= column_stat.update_stat(); + if (err && !rc) + rc= 1; + } + + /* Update the statistical table index_stats */ + stat_table= tables[INDEX_STAT].table; + uint key; + key_map::Iterator it(table->keys_in_use_for_query); + Index_stat index_stat(stat_table, table); + + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + KEY *key_info= table->key_info+key; + uint key_parts= table->actual_n_key_parts(key_info); + for (i= 0; i < key_parts; i++) + { + restore_record(stat_table, s->default_values); + index_stat.set_key_fields(key_info, i+1); + err= index_stat.update_stat(); + if (err && !rc) + rc= 1; + } + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(rc); +} + + +/** + @brief + Read statistics for a table from the persistent statistical tables + + @param + thd The thread handle + @param + table The table to read statistics on + @param + stat_tables The array of TABLE_LIST objects for statistical tables + + @details + For each statistical table the function looks for the rows from this + table that contain statistical data on 'table'. If such rows is found + the data from statistical columns of it is read into the appropriate + fields of internal structures for 'table'. Later at the query processing + this data are supposed to be used by the optimizer. + The parameter stat_tables should point to an array of TABLE_LIST + objects for all statistical tables linked into a list. All statistical + tables are supposed to be opened. + The function is called by read_statistics_for_tables_if_needed(). + + @retval + 0 If data has been successfully read for the table + @retval + 1 Otherwise + + @note + Objects of the helper classes Table_stat, Column_stat and Index_stat + are employed to read statistical data from the statistical tables. + now. +*/ + +static +int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) +{ + uint i; + TABLE *stat_table; + Field *table_field; + Field **field_ptr; + KEY *key_info, *key_info_end; + TABLE_SHARE *table_share= table->s; + + DBUG_ENTER("read_statistics_for_table"); + + /* Read statistics from the statistical table table_stats */ + stat_table= stat_tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, table); + table_stat.set_key_fields(); + table_stat.get_stat_values(); + + /* Read statistics from the statistical table column_stats */ + stat_table= stat_tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, table); + for (field_ptr= table_share->field; *field_ptr; field_ptr++) + { + table_field= *field_ptr; + column_stat.set_key_fields(table_field); + column_stat.get_stat_values(); + } + + /* Read statistics from the statistical table index_stats */ + Table_statistics *read_stats= table_share->stats_cb.table_stats; + stat_table= stat_tables[INDEX_STAT].table; + Index_stat index_stat(stat_table, table); + for (key_info= table_share->key_info, + key_info_end= key_info + table_share->keys; + key_info < key_info_end; key_info++) + { + uint key_parts= key_info->ext_key_parts; + for (i= 0; i < key_parts; i++) + { + index_stat.set_key_fields(key_info, i+1); + index_stat.get_stat_values(); + } + + key_part_map ext_key_part_map= key_info->ext_key_part_map; + if (key_info->key_parts != key_info->ext_key_parts && + key_info->read_stats->get_avg_frequency(key_info->key_parts) == 0) + { + KEY *pk_key_info= table_share->key_info + table_share->primary_key; + uint k= key_info->key_parts; + uint pk_parts= pk_key_info->key_parts; + ha_rows n_rows= read_stats->cardinality; + double k_dist= n_rows / key_info->read_stats->get_avg_frequency(k-1); + uint m= 0; + for (uint j= 0; j < pk_parts; j++) + { + if (!(ext_key_part_map & 1 << j)) + { + for (uint l= k; l < k + m; l++) + { + double avg_frequency= + pk_key_info->read_stats->get_avg_frequency(j-1); + set_if_smaller(avg_frequency, 1); + double val= pk_key_info->read_stats->get_avg_frequency(j) / + avg_frequency; + key_info->read_stats->set_avg_frequency (l, val); + } + } + else + { + double avg_frequency= pk_key_info->read_stats->get_avg_frequency(j); + key_info->read_stats->set_avg_frequency(k + m, avg_frequency); + m++; + } + } + for (uint l= k; l < k + m; l++) + { + double avg_frequency= key_info->read_stats->get_avg_frequency(l); + if (avg_frequency == 0 || read_stats->cardinality_is_null) + avg_frequency= 1; + else if (avg_frequency > 1) + { + avg_frequency/= k_dist; + set_if_bigger(avg_frequency, 1); + } + key_info->read_stats->set_avg_frequency(l, avg_frequency); + } + } + } + + DBUG_RETURN(0); +} + + +/** + @brief + Check whether any statistics is to be read for tables from a table list + + @param + thd The thread handle + @param + tables The tables list for whose tables the check is to be done + + @details + The function checks whether for any of the tables opened and locked for + a statement statistics from statistical tables is needed to be read. + + @retval + TRUE statistics for any of the tables is needed to be read + @retval + FALSE Otherwise +*/ + +static +bool statistics_for_tables_is_needed(THD *thd, TABLE_LIST *tables) +{ + if (!tables) + return FALSE; + + if (!statistics_for_command_is_needed(thd)) + return FALSE; + + /* + Do not read statistics for any query over non-user tables. + If the query references some statistical tables, but not all + of them, reading the statistics may lead to a deadlock + */ + for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) + { + if (!tl->is_view_or_derived() && tl->table) + { + TABLE_SHARE *table_share= tl->table->s; + if (table_share && + (table_share->table_category != TABLE_CATEGORY_USER || + table_share->tmp_table != NO_TMP_TABLE)) + return FALSE; + } + } + + for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) + { + if (!tl->is_view_or_derived() && tl->table) + { + TABLE_SHARE *table_share= tl->table->s; + if (table_share && + table_share->stats_cb.stats_can_be_read && + !table_share->stats_cb.stats_is_read) + return TRUE; + } + } + + return FALSE; +} + + +/** + @brief + Read statistics for tables from a table list if it is needed + + @param + thd The thread handle + @param + tables The tables list for whose tables to read statistics + + @details + The function first checks whether for any of the tables opened and locked + for a statement statistics from statistical tables is needed to be read. + Then, if so, it opens system statistical tables for read and reads + the statistical data from them for those tables from the list for which it + makes sense. Then the function closes system statistical tables. + + @retval + 0 Statistics for tables was successfully read + @retval + 1 Otherwise +*/ + +int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables) +{ + TABLE_LIST stat_tables[STATISTICS_TABLES]; + Open_tables_backup open_tables_backup; + + DBUG_ENTER("read_statistics_for_table_if_needed"); + + DEBUG_SYNC(thd, "statistics_read_start"); + + if (!statistics_for_tables_is_needed(thd, tables)) + DBUG_RETURN(0); + + if (open_stat_tables(thd, stat_tables, &open_tables_backup, FALSE)) + { + thd->clear_error(); + DBUG_RETURN(1); + } + + for (TABLE_LIST *tl= tables; tl; tl= tl->next_global) + { + if (!tl->is_view_or_derived() && tl->table) + { + TABLE_SHARE *table_share= tl->table->s; + if (table_share && + table_share->stats_cb.stats_can_be_read && + !table_share->stats_cb.stats_is_read) + { + (void) read_statistics_for_table(thd, tl->table, stat_tables); + table_share->stats_cb.stats_is_read= TRUE; + } + } + } + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(0); +} + + +/** + @brief + Delete statistics on a table from all statistical tables + + @param + thd The thread handle + @param + db The name of the database the table belongs to + @param + tab The name of the table whose statistics is to be deleted + + @details + The function delete statistics on the table called 'tab' of the database + 'db' from all statistical tables: table_stats, column_stats, index_stats. + + @retval + 0 If all deletions are successful + @retval + 1 Otherwise + + @note + The function is called when executing the statement DROP TABLE 'tab'. +*/ + +int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables[STATISTICS_TABLES]; + Open_tables_backup open_tables_backup; + int rc= 0; + + DBUG_ENTER("delete_statistics_for_table"); + + if (open_stat_tables(thd, tables, &open_tables_backup, TRUE)) + { + thd->clear_error(); + DBUG_RETURN(rc); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Delete statistics on table from the statistical table index_stats */ + stat_table= tables[INDEX_STAT].table; + Index_stat index_stat(stat_table, db, tab); + index_stat.set_full_table_name(); + while (index_stat.find_next_stat_for_prefix(2)) + { + err= index_stat.delete_stat(); + if (err & !rc) + rc= 1; + } + + /* Delete statistics on table from the statistical table column_stats */ + stat_table= tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, db, tab); + column_stat.set_full_table_name(); + while (column_stat.find_next_stat_for_prefix(2)) + { + err= column_stat.delete_stat(); + if (err & !rc) + rc= 1; + } + + /* Delete statistics on table from the statistical table table_stats */ + stat_table= tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, db, tab); + table_stat.set_key_fields(); + if (table_stat.find_stat()) + { + err= table_stat.delete_stat(); + if (err & !rc) + rc= 1; + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(rc); +} + + +/** + @brief + Delete statistics on a column of the specified table + + @param + thd The thread handle + @param + tab The table the column belongs to + @param + col The field of the column whose statistics is to be deleted + + @details + The function delete statistics on the column 'col' belonging to the table + 'tab' from the statistical table column_stats. + + @retval + 0 If the deletion is successful + @retval + 1 Otherwise + + @note + The function is called when dropping a table column or when changing + the definition of this column. +*/ + +int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + Open_tables_backup open_tables_backup; + int rc= 0; + + DBUG_ENTER("delete_statistics_for_column"); + + if (open_single_stat_table(thd, &tables, &stat_table_name[1], + &open_tables_backup, TRUE)) + { + thd->clear_error(); + DBUG_RETURN(rc); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + stat_table= tables.table; + Column_stat column_stat(stat_table, tab); + column_stat.set_key_fields(col); + if (column_stat.find_stat()) + { + err= column_stat.delete_stat(); + if (err) + rc= 1; + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(rc); +} + + +/** + @brief + Delete statistics on an index of the specified table + + @param + thd The thread handle + @param + tab The table the index belongs to + @param + key_info The descriptor of the index whose statistics is to be deleted + @param + ext_prefixes_only Delete statistics only on the index prefixes extended by + the components of the primary key + + @details + The function delete statistics on the index specified by 'key_info' + defined on the table 'tab' from the statistical table index_stats. + + @retval + 0 If the deletion is successful + @retval + 1 Otherwise + + @note + The function is called when dropping an index, or dropping/changing the + definition of a column used in the definition of the index. +*/ + +int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info, + bool ext_prefixes_only) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + Open_tables_backup open_tables_backup; + int rc= 0; + + DBUG_ENTER("delete_statistics_for_index"); + + if (open_single_stat_table(thd, &tables, &stat_table_name[2], + &open_tables_backup, TRUE)) + { + thd->clear_error(); + DBUG_RETURN(rc); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + stat_table= tables.table; + Index_stat index_stat(stat_table, tab); + if (!ext_prefixes_only) + { + index_stat.set_index_prefix_key_fields(key_info); + while (index_stat.find_next_stat_for_prefix(3)) + { + err= index_stat.delete_stat(); + if (err && !rc) + rc= 1; + } + } + else + { + for (uint i= key_info->key_parts; i < key_info->ext_key_parts; i++) + { + index_stat.set_key_fields(key_info, i+1); + if (index_stat.find_next_stat_for_prefix(4)) + { + err= index_stat.delete_stat(); + if (err && !rc) + rc= 1; + } + } + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(rc); +} + + +/** + @brief + Rename a table in all statistical tables + + @param + thd The thread handle + @param + db The name of the database the table belongs to + @param + tab The name of the table to be renamed in statistical tables + @param + new_tab The new name of the table + + @details + The function replaces the name of the table 'tab' from the database 'db' + for 'new_tab' in all all statistical tables: table_stats, column_stats, + index_stats. + + @retval + 0 If all updates of the table name are successful + @retval + 1 Otherwise + + @note + The function is called when executing any statement that renames a table +*/ + +int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab, + LEX_STRING *new_db, LEX_STRING *new_tab) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables[STATISTICS_TABLES]; + Open_tables_backup open_tables_backup; + int rc= 0; + + DBUG_ENTER("rename_table_in_stat_tables"); + + if (open_stat_tables(thd, tables, &open_tables_backup, TRUE)) + { + thd->clear_error(); + DBUG_RETURN(rc); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Rename table in the statistical table index_stats */ + stat_table= tables[INDEX_STAT].table; + Index_stat index_stat(stat_table, db, tab); + index_stat.set_full_table_name(); + while (index_stat.find_next_stat_for_prefix(2)) + { + err= index_stat.update_table_name_key_parts(new_db, new_tab); + if (err & !rc) + rc= 1; + index_stat.set_full_table_name(); + } + + /* Rename table in the statistical table column_stats */ + stat_table= tables[COLUMN_STAT].table; + Column_stat column_stat(stat_table, db, tab); + column_stat.set_full_table_name(); + while (column_stat.find_next_stat_for_prefix(2)) + { + err= column_stat.update_table_name_key_parts(new_db, new_tab); + if (err & !rc) + rc= 1; + column_stat.set_full_table_name(); + } + + /* Rename table in the statistical table table_stats */ + stat_table= tables[TABLE_STAT].table; + Table_stat table_stat(stat_table, db, tab); + table_stat.set_key_fields(); + if (table_stat.find_stat()) + { + err= table_stat.update_table_name_key_parts(new_db, new_tab); + if (err & !rc) + rc= 1; + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(rc); +} + + +/** + @brief + Rename a column in the statistical table column_stats + + @param + thd The thread handle + @param + tab The table the column belongs to + @param + col The column to be renamed + @param + new_name The new column name + + @details + The function replaces the name of the column 'col' belonging to the table + 'tab' for 'new_name' in the statistical table column_stats. + + @retval + 0 If all updates of the table name are successful + @retval + 1 Otherwise + + @note + The function is called when executing any statement that renames a column, + but does not change the column definition. +*/ + +int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, + const char *new_name) +{ + int err; + enum_binlog_format save_binlog_format; + TABLE *stat_table; + TABLE_LIST tables; + Open_tables_backup open_tables_backup; + int rc= 0; + + DBUG_ENTER("rename_column_in_stat_tables"); + + if (open_single_stat_table(thd, &tables, &stat_table_name[1], + &open_tables_backup, TRUE)) + { + thd->clear_error(); + DBUG_RETURN(rc); + } + + save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); + + /* Rename column in the statistical table table_stat */ + stat_table= tables.table; + Column_stat column_stat(stat_table, tab); + column_stat.set_key_fields(col); + if (column_stat.find_stat()) + { + err= column_stat.update_column_key_part(new_name); + if (err & !rc) + rc= 1; + } + + thd->restore_stmt_binlog_format(save_binlog_format); + + close_system_tables(thd, &open_tables_backup); + + DBUG_RETURN(rc); +} + + +/** + @brief + Set statistics for a table that will be used by the optimizer + + @param + thd The thread handle + @param + table The table to set statistics for + + @details + Depending on the value of thd->variables.use_stat_tables + the function performs the settings for the table that will control + from where the statistical data used by the optimizer will be taken. +*/ + +void set_statistics_for_table(THD *thd, TABLE *table) +{ + TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb; + Table_statistics *read_stats= stats_cb->table_stats; + Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd); + table->used_stat_records= + (use_stat_table_mode <= COMPLEMENTARY || + !stats_cb->stats_is_read || read_stats->cardinality_is_null) ? + table->file->stats.records : read_stats->cardinality; + KEY *key_info, *key_info_end; + for (key_info= table->key_info, key_info_end= key_info+table->s->keys; + key_info < key_info_end; key_info++) + { + key_info->is_statistics_from_stat_tables= + (use_stat_table_mode > COMPLEMENTARY && + stats_cb->stats_is_read && + key_info->read_stats->avg_frequency_is_inited() && + key_info->read_stats->get_avg_frequency(0) > 0.5); + } +} diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h new file mode 100644 index 00000000000..17f22cec4e5 --- /dev/null +++ b/sql/sql_statistics.h @@ -0,0 +1,248 @@ +/* Copyright 2006-2008 MySQL AB, 2008 Sun Microsystems, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#ifndef SQL_STATISTICS_H +#define SQL_STATISTICS_H + +/* + These enumeration types comprise the dictionary of three + statistical tables table_stat, column_stat and index_stat + as they defined in ../scripts/mysql_system_tables.sql. + + It would be nice if the declarations of these types were + generated automatically by the table definitions. +*/ + +typedef +enum enum_use_stat_tables_mode +{ + NEVER, + COMPLEMENTARY, + PEFERABLY, +} Use_stat_tables_mode; + +enum enum_stat_tables +{ + TABLE_STAT, + COLUMN_STAT, + INDEX_STAT, +}; + +enum enum_table_stat_col +{ + TABLE_STAT_DB_NAME, + TABLE_STAT_TABLE_NAME, + TABLE_STAT_CARDINALITY +}; + +enum enum_column_stat_col +{ + COLUMN_STAT_DB_NAME, + COLUMN_STAT_TABLE_NAME, + COLUMN_STAT_COLUMN_NAME, + COLUMN_STAT_MIN_VALUE, + COLUMN_STAT_MAX_VALUE, + COLUMN_STAT_NULLS_RATIO, + COLUMN_STAT_AVG_LENGTH, + COLUMN_STAT_AVG_FREQUENCY +}; + +enum enum_index_stat_col +{ + INDEX_STAT_DB_NAME, + INDEX_STAT_TABLE_NAME, + INDEX_STAT_INDEX_NAME, + INDEX_STAT_PREFIX_ARITY, + INDEX_STAT_AVG_FREQUENCY +}; + +inline +Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) +{ + return (Use_stat_tables_mode) (thd->variables.use_stat_tables); +} + +int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); +int collect_statistics_for_table(THD *thd, TABLE *table); +int alloc_statistics_for_table_share(THD* thd, TABLE_SHARE *share, + bool is_safe); +int alloc_statistics_for_table(THD *thd, TABLE *table); +int update_statistics_for_table(THD *thd, TABLE *table); +int delete_statistics_for_table(THD *thd, LEX_STRING *db, LEX_STRING *tab); +int delete_statistics_for_column(THD *thd, TABLE *tab, Field *col); +int delete_statistics_for_index(THD *thd, TABLE *tab, KEY *key_info, + bool ext_prefixes_only); +int rename_table_in_stat_tables(THD *thd, LEX_STRING *db, LEX_STRING *tab, + LEX_STRING *new_db, LEX_STRING *new_tab); +int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, + const char *new_name); +void set_statistics_for_table(THD *thd, TABLE *table); + +class Columns_statistics; +class Index_statistics; + + +/* Statistical data on a table */ + +class Table_statistics +{ + +public: + my_bool cardinality_is_null; /* TRUE if the cardinality is unknown */ + ha_rows cardinality; /* Number of rows in the table */ + uchar *min_max_record_buffers; /* Record buffers for min/max values */ + Column_statistics *column_stats; /* Array of statistical data for columns */ + Index_statistics *index_stats; /* Array of statistical data for indexes */ + ulong *idx_avg_frequency; /* Array of records per key for index prefixes */ + +}; + + +/* Statistical data on a column */ + +class Column_statistics +{ + +private: + static const uint Scale_factor_nulls_ratio= 100000; + static const uint Scale_factor_avg_length= 100000; + static const uint Scale_factor_avg_frequency= 100000; + +public: + /* + Bitmap indicating what statistical characteristics + are available for the column + */ + uint32 column_stat_nulls; + + /* Minimum value for the column */ + Field *min_value; + /* Maximum value for the column */ + Field *max_value; + +private: + + /* + The ratio Z/N multiplied by the scale factor Scale_factor_nulls_ratio, + where + N is the total number of rows, + Z is the number of nulls in the column + */ + ulong nulls_ratio; + + /* + Average number of bytes occupied by the representation of a + value of the column in memory buffers such as join buffer + multiplied by the scale factor Scale_factor_avg_length. + CHAR values are stripped of trailing spaces. + Flexible values are stripped of their length prefixes. + */ + ulong avg_length; + + /* + The ratio N/D multiplied by the scale factor Scale_factor_avg_frequency, + where + N is the number of rows with not null value in the column, + D the number of distinct values among them + */ + ulong avg_frequency; + +public: + + void set_all_nulls() + { + column_stat_nulls= + ((1 << (COLUMN_STAT_AVG_FREQUENCY-COLUMN_STAT_COLUMN_NAME))-1) << + (COLUMN_STAT_COLUMN_NAME+1); + } + + void set_not_null(uint stat_field_no) + { + column_stat_nulls&= ~(1 << stat_field_no); + } + + bool is_null(uint stat_field_no) + { + return test(column_stat_nulls & (1 << stat_field_no)); + } + + double get_nulls_ratio() + { + return (double) nulls_ratio / Scale_factor_nulls_ratio; + } + + double get_avg_length() + { + return (double) avg_length / Scale_factor_avg_length; + } + + double get_avg_frequency() + { + return (double) avg_frequency / Scale_factor_avg_frequency; + } + + void set_nulls_ratio (double val) + { + nulls_ratio= (ulong) (val * Scale_factor_nulls_ratio); + } + + void set_avg_length (double val) + { + avg_length= (ulong) (val * Scale_factor_avg_length); + } + + void set_avg_frequency (double val) + { + avg_frequency= (ulong) (val * Scale_factor_avg_frequency); + } + +}; + + +/* Statistical data on an index prefixes */ + +class Index_statistics +{ + +private: + static const uint Scale_factor_avg_frequency= 100000; + /* + The k-th element of this array contains the ratio N/D + multiplied by the scale factor Scale_factor_avg_frequency, + where N is the number of index entries without nulls + in the first k components, and D is the number of distinct + k-component prefixes among them + */ + ulong *avg_frequency; + +public: + + void init_avg_frequency(ulong *ptr) { avg_frequency= ptr; } + + bool avg_frequency_is_inited() { return avg_frequency != NULL; } + + double get_avg_frequency(uint i) + { + return (double) avg_frequency[i] / Scale_factor_avg_frequency; + } + + void set_avg_frequency(uint i, double val) + { + avg_frequency[i]= (ulong) (val * Scale_factor_avg_frequency); + } + +}; + +#endif /* SQL_STATISTICS_H */ diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 6e99ebb0d37..31e4110c4b1 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -43,6 +43,7 @@ #include "discover.h" // readfrm #include "my_pthread.h" // pthread_mutex_t #include "log_event.h" // Query_log_event +#include "sql_statistics.h" #include <hash.h> #include <myisam.h> #include <my_dir.h> @@ -1890,6 +1891,17 @@ bool mysql_rm_table(THD *thd,TABLE_LIST *tables, my_bool if_exists, } } + if (!in_bootstrap) + { + for (table= tables; table; table= table->next_local) + { + LEX_STRING db_name= { table->db, table->db_length }; + LEX_STRING table_name= { table->table_name, table->table_name_length }; + if (table->open_type == OT_BASE_ONLY || !find_temporary_table(thd, table)) + (void) delete_statistics_for_table(thd, &db_name, &table_name); + } + } + mysql_ha_rm_tables(thd, tables); if (!drop_temporary) @@ -1900,6 +1912,7 @@ bool mysql_rm_table(THD *thd,TABLE_LIST *tables, my_bool if_exists, MYSQL_OPEN_SKIP_TEMPORARY)) DBUG_RETURN(true); for (table= tables; table; table= table->next_local) + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, table->db, table->table_name, false); } @@ -4134,7 +4147,8 @@ bool mysql_create_table_no_lock(THD *thd, set_table_default_charset(thd, create_info, (char*) db); db_options= create_info->table_options; - if (create_info->row_type != ROW_TYPE_FIXED && + if (!create_info->frm_only && + create_info->row_type != ROW_TYPE_FIXED && create_info->row_type != ROW_TYPE_DEFAULT) db_options|= HA_OPTION_PACK_RECORD; alias= table_case_name(create_info, table_name); @@ -4561,7 +4575,8 @@ bool mysql_create_table(THD *thd, TABLE_LIST *create_table, */ if (open_and_lock_tables(thd, thd->lex->query_tables, FALSE, 0)) { - result= TRUE; + /* is_error() may be 0 if table existed and we generated a warning */ + result= thd->is_error(); goto end; } @@ -4771,7 +4786,10 @@ bool mysql_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table, properly isolated from all concurrent operations which matter. */ if (open_tables(thd, &thd->lex->query_tables, ¬_used, 0)) + { + res= thd->is_error(); goto err; + } src_table->table->use_all_columns(); DEBUG_SYNC(thd, "create_table_like_after_open"); @@ -5171,6 +5189,21 @@ mysql_compare_tables(TABLE *table, thd->calloc(sizeof(void*) * table->s->keys)) == NULL) DBUG_RETURN(1); + tmp_new_field_it.init(tmp_alter_info.create_list); + for (i= 0, f_ptr= table->field, tmp_new_field= tmp_new_field_it++; + (field= *f_ptr); + i++, f_ptr++, tmp_new_field= tmp_new_field_it++) + { + if (field->is_equal(tmp_new_field) == IS_EQUAL_NO && + table->s->tmp_table == NO_TMP_TABLE) + (void) delete_statistics_for_column(thd, table, field); + else if (my_strcasecmp(system_charset_info, + field->field_name, + tmp_new_field->field_name)) + (void) rename_column_in_stat_tables(thd, table, field, + tmp_new_field->field_name); + } + /* Use transformed info to evaluate possibility of in-place ALTER TABLE but use the preserved field to persist modifications. @@ -5231,11 +5264,36 @@ mysql_compare_tables(TABLE *table, if (my_strcasecmp(system_charset_info, field->field_name, tmp_new_field->field_name)) - field->flags|= FIELD_IS_RENAMED; + { + field->flags|= FIELD_IS_RENAMED; + if (table->s->tmp_table == NO_TMP_TABLE) + rename_column_in_stat_tables(thd, table, field, + tmp_new_field->field_name); + } /* Evaluate changes bitmap and send to check_if_incompatible_data() */ if (!(tmp= field->is_equal(tmp_new_field))) { + if (table->s->tmp_table == NO_TMP_TABLE) + { + KEY *key_info= table->key_info; + for (uint i=0; i < table->s->keys; i++, key_info++) + { + if (field->part_of_key.is_set(i)) + { + uint key_parts= table->actual_n_key_parts(key_info); + for (uint j= 0; j < key_parts; j++) + { + if (key_info->key_part[j].fieldnr-1 == field->field_index) + { + (void) delete_statistics_for_index(thd, table, key_info, + j >= key_info->key_parts); + break; + } + } + } + } + } DBUG_PRINT("info", ("!field_is_equal('%s') -> ALTER_TABLE_DATA_CHANGED", new_field->field_name)); DBUG_RETURN(0); @@ -5334,6 +5392,21 @@ mysql_compare_tables(TABLE *table, field= table->field[key_part->fieldnr]; field->flags|= FIELD_IN_ADD_INDEX; } + if (table->s->tmp_table == NO_TMP_TABLE) + { + (void) delete_statistics_for_index(thd, table, table_key, FALSE); + if (table_key - table->key_info == table->s->primary_key) + { + KEY *tab_key_info= table->key_info; + for (uint j=0; j < table->s->keys; j++, tab_key_info++) + { + if (tab_key_info->key_parts != tab_key_info->ext_key_parts) + (void) delete_statistics_for_index(thd, table, tab_key_info, + TRUE); + } + } + } + DBUG_PRINT("info", ("index changed: '%s'", table_key->name)); } /*end of for (; table_key < table_key_end;) */ @@ -5535,6 +5608,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, uint used_fields= create_info->used_fields; KEY *key_info=table->key_info; bool rc= TRUE; + bool modified_primary_key= FALSE; Create_field *def; Field **f_ptr,*field; DBUG_ENTER("mysql_prepare_alter_table"); @@ -5591,6 +5665,8 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, } if (drop) { + if (table->s->tmp_table == NO_TMP_TABLE) + (void) delete_statistics_for_column(thd, table, field); drop_it.remove(); /* ALTER TABLE DROP COLUMN always changes table data even in cases @@ -5729,7 +5805,7 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, Collect all keys which isn't in drop list. Add only those for which some fields exists. */ - + for (uint i=0 ; i < table->s->keys ; i++,key_info++) { char *key_name= key_info->name; @@ -5743,12 +5819,27 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, } if (drop) { + if (table->s->tmp_table == NO_TMP_TABLE) + { + (void) delete_statistics_for_index(thd, table, key_info, FALSE); + if (i == table->s->primary_key) + { + KEY *tab_key_info= table->key_info; + for (uint j=0; j < table->s->keys; j++, tab_key_info++) + { + if (tab_key_info->key_parts != tab_key_info->ext_key_parts) + (void) delete_statistics_for_index(thd, table, tab_key_info, + TRUE); + } + } + } drop_it.remove(); continue; } KEY_PART_INFO *key_part= key_info->key_part; key_parts.empty(); + bool delete_index_stat= FALSE; for (uint j=0 ; j < key_info->key_parts ; j++,key_part++) { if (!key_part->field) @@ -5771,7 +5862,12 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, break; } if (!cfield) + { + if (table->s->primary_key == i) + modified_primary_key= TRUE; + delete_index_stat= TRUE; continue; // Field is removed + } key_part_length= key_part->length; if (cfield->field) // Not new field { @@ -5813,6 +5909,15 @@ mysql_prepare_alter_table(THD *thd, TABLE *table, strlen(cfield->field_name), key_part_length)); } + if (table->s->tmp_table == NO_TMP_TABLE) + { + if (delete_index_stat) + (void) delete_statistics_for_index(thd, table, key_info, FALSE); + else if (modified_primary_key && + key_info->key_parts != key_info->ext_key_parts) + (void) delete_statistics_for_index(thd, table, key_info, TRUE); + } + if (key_parts.elements) { KEY_CREATE_INFO key_create_info; @@ -5992,6 +6097,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, enum ha_extra_function extra_func= thd->locked_tables_mode ? HA_EXTRA_NOT_USED : HA_EXTRA_FORCE_REOPEN; + LEX_STRING old_db_name= { table_list->db, table_list->db_length }; + LEX_STRING old_table_name= { table_list->table_name, + table_list->table_name_length }; DBUG_ENTER("mysql_alter_table"); /* @@ -6310,6 +6418,12 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, else { *fn_ext(new_name)=0; + + LEX_STRING new_db_name= { new_db, strlen(new_db) }; + LEX_STRING new_table_name= { new_alias, strlen(new_alias) }; + (void) rename_table_in_stat_tables(thd, &old_db_name, &old_table_name, + &new_db_name, &new_table_name); + if (mysql_rename_table(old_db_type,db,table_name,new_db,new_alias, 0)) error= -1; else if (Table_triggers_list::change_table_name(thd, db, @@ -6696,9 +6810,19 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, my_sleep(100000);); /* Create a table with a temporary name. - With create_info->frm_only == 1 this creates a .frm file only. + With create_info->frm_only == 1 this creates a .frm file only and + we keep the original row format. We don't log the statement, it will be logged later. */ + if (need_copy_table == ALTER_TABLE_METADATA_ONLY) + { + DBUG_ASSERT(create_info->frm_only); + /* Ensure we keep the original table format */ + create_info->table_options= ((create_info->table_options & + ~HA_OPTION_PACK_RECORD) | + (table->s->db_create_options & + HA_OPTION_PACK_RECORD)); + } tmp_disable_binlog(thd); error= mysql_create_table_no_lock(thd, new_db, tmp_name, create_info, @@ -7053,6 +7177,15 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, table is renamed and the SE is also changed, then an intermediate table is created and the additional call will not take place. */ + + if (new_name != table_name || new_db != db) + { + LEX_STRING new_db_name= { new_db, strlen(new_db) }; + LEX_STRING new_table_name= { new_name, strlen(new_name) }; + (void) rename_table_in_stat_tables(thd, &old_db_name, &old_table_name, + &new_db_name, &new_table_name); + } + if (need_copy_table == ALTER_TABLE_METADATA_ONLY) { DBUG_ASSERT(new_db_type == old_db_type); @@ -7364,7 +7497,8 @@ copy_data_between_tables(THD *thd, TABLE *from,TABLE *to, thd->abort_on_warning= !ignore && thd->is_strict_mode(); from->file->info(HA_STATUS_VARIABLE); - to->file->ha_start_bulk_insert(from->file->stats.records); + to->file->ha_start_bulk_insert(from->file->stats.records, + ignore ? 0 : HA_CREATE_UNIQUE_INDEX_BY_SORT); errpos= 3; copy_end=copy; diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 4ed67c86eb1..066bc2c24f7 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -32,6 +32,7 @@ #include "sql_view.h" // check_key_in_view #include "sp_head.h" #include "sql_trigger.h" +#include "sql_statistics.h" #include "probes_mysql.h" #include "debug_sync.h" #include "key.h" // is_key_used @@ -393,6 +394,7 @@ int mysql_update(THD *thd, #endif /* Update the table->file->stats.records number */ table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); + set_statistics_for_table(thd, table); select= make_select(table, 0, 0, conds, 0, &error); if (error || !limit || thd->is_error() || diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index e9df67a7a05..ac904c990ed 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1716,7 +1716,12 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); show describe load alter optimize keycache preload flush reset purge begin commit rollback savepoint release slave master_def master_defs master_file_def slave_until_opts - repair analyze check start checksum + repair analyze + analyze_table_list analyze_table_elem_spec + opt_persistent_stat_clause persistent_stat_spec + persistent_column_stat_spec persistent_index_stat_spec + table_column_list table_index_list table_index_name + check start checksum field_list field_list_item field_spec kill column_def key_def keycache_list keycache_list_or_parts assign_to_keycache assign_to_keycache_parts @@ -7432,7 +7437,7 @@ analyze: /* Will be overriden during execution. */ YYPS->m_lock_type= TL_UNLOCK; } - table_list + analyze_table_list { THD *thd= YYTHD; LEX* lex= thd->lex; @@ -7443,6 +7448,96 @@ analyze: } ; +analyze_table_list: + analyze_table_elem_spec + | analyze_table_list ',' analyze_table_elem_spec + ; + +analyze_table_elem_spec: + table_name opt_persistent_stat_clause + ; + +opt_persistent_stat_clause: + /* empty */ + {} + | PERSISTENT_SYM FOR_SYM persistent_stat_spec + { + THD *thd= YYTHD; + thd->lex->with_persistent_for_clause= TRUE; + } + ; + +persistent_stat_spec: + ALL + {} + | COLUMNS persistent_column_stat_spec INDEXES persistent_index_stat_spec + {} + +persistent_column_stat_spec: + ALL {} + | '(' + { + THD *thd= YYTHD; + LEX* lex= thd->lex; + lex->column_list= new List<LEX_STRING>; + if (lex->column_list == NULL) + MYSQL_YYABORT; + } + table_column_list + ')' + ; + +persistent_index_stat_spec: + ALL {} + | '(' + { + THD *thd= YYTHD; + LEX* lex= thd->lex; + lex->index_list= new List<LEX_STRING>; + if (lex->index_list == NULL) + MYSQL_YYABORT; + } + table_index_list + ')' + ; + +table_column_list: + /* empty */ + {} + | ident + { + Lex->column_list->push_back((LEX_STRING*) + sql_memdup(&$1, sizeof(LEX_STRING))); + } + | table_column_list ',' ident + { + Lex->column_list->push_back((LEX_STRING*) + sql_memdup(&$3, sizeof(LEX_STRING))); + } + ; + +table_index_list: + /* empty */ + {} + | table_index_name + | table_index_list ',' table_index_name + ; + +table_index_name: + ident + { + Lex->index_list->push_back( + (LEX_STRING*) sql_memdup(&$1, sizeof(LEX_STRING))); + } + | + PRIMARY_SYM + { + LEX_STRING str= {(char*) "PRIMARY", 7}; + Lex->index_list->push_back( + (LEX_STRING*) sql_memdup(&str, sizeof(LEX_STRING))); + } + ; + binlog_base64_event: BINLOG_SYM TEXT_STRING_sys { diff --git a/sql/structs.h b/sql/structs.h index ae71819ae09..a3a54c524e6 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -29,6 +29,7 @@ struct TABLE; class Field; +class Index_statistics; class THD; @@ -96,6 +97,11 @@ typedef struct st_key { uint block_size; uint name_length; enum ha_key_alg algorithm; + /* + The flag is on if statistical data for the index prefixes + has to be taken from the system statistical tables. + */ + bool is_statistics_from_stat_tables; /* Note that parser is used when the table is opened for use, and parser_name is used when the table is being created. @@ -115,6 +121,18 @@ typedef struct st_key { For temporary heap tables this member is NULL. */ ulong *rec_per_key; + + /* + This structure is used for statistical data on the index + that has been read from the statistical table index_stat + */ + Index_statistics *read_stats; + /* + This structure is used for statistical data on the index that + is collected by the function collect_statistics_for_table + */ + Index_statistics *collected_stats; + union { int bdb_return_if_eq; } handler; @@ -123,6 +141,9 @@ typedef struct st_key { /** reference to the list of options or NULL */ engine_option_value *option_list; ha_index_option_struct *option_struct; /* structure with parsed options */ + + double actual_rec_per_key(uint i); + } KEY; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 55b225f32d2..c585f6681f0 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -3992,6 +3992,15 @@ static Sys_var_ulong Sys_progress_report_time( SESSION_VAR(progress_report_time), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, UINT_MAX), DEFAULT(56), BLOCK_SIZE(1)); +const char *use_stat_tables_modes[] = + {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0}; +static Sys_var_enum Sys_optimizer_use_stat_tables( + "use_stat_tables", + "Specifies how to use system statistics tables. Possible values are " + "NEVER, COMPLEMENTARY, PREVERABLY", + SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG), + use_stat_tables_modes, DEFAULT(0)); + static Sys_var_mybool Sys_no_thread_alarm( "debug_no_thread_alarm", "Disable system thread alarm calls. Disabling it may be useful " diff --git a/sql/table.cc b/sql/table.cc index 9954d91cab4..22d4eed1b12 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -39,6 +39,7 @@ #include "my_bit.h" #include "sql_select.h" #include "sql_derived.h" +#include "sql_statistics.h" #include "mdl.h" // MDL_wait_for_graph_visitor /* INFORMATION_SCHEMA name */ @@ -339,6 +340,8 @@ TABLE_SHARE *alloc_table_share(TABLE_LIST *table_list, char *key, share->free_tables.empty(); share->m_flush_tickets.empty(); + init_sql_alloc(&share->stats_cb.mem_root, TABLE_ALLOC_BLOCK_SIZE, 0); + memcpy((char*) &share->mem_root, (char*) &mem_root, sizeof(mem_root)); mysql_mutex_init(key_TABLE_SHARE_LOCK_ha_data, &share->LOCK_ha_data, MY_MUTEX_INIT_FAST); @@ -419,6 +422,14 @@ void TABLE_SHARE::destroy() uint idx; KEY *info_it; + if (tmp_table == NO_TMP_TABLE) + mysql_mutex_lock(&LOCK_ha_data); + free_root(&stats_cb.mem_root, MYF(0)); + stats_cb.stats_can_be_read= FALSE; + stats_cb.stats_is_read= FALSE; + if (tmp_table == NO_TMP_TABLE) + mysql_mutex_unlock(&LOCK_ha_data); + /* The mutex is initialized only for shares that are part of the TDC */ if (tmp_table == NO_TMP_TABLE) mysql_mutex_destroy(&LOCK_ha_data); @@ -544,6 +555,13 @@ inline bool is_system_table_name(const char *name, uint length) my_tolower(ci, name[2]) == 'm' && my_tolower(ci, name[3]) == 'e') || + /* one of mysql.*_stat tables */ + (my_tolower(ci, name[length-5]) == 's' && + my_tolower(ci, name[length-4]) == 't' && + my_tolower(ci, name[length-3]) == 'a' && + my_tolower(ci, name[length-2]) == 't' && + my_tolower(ci, name[length-1]) == 's') || + /* mysql.event table */ (my_tolower(ci, name[0]) == 'e' && my_tolower(ci, name[1]) == 'v' && @@ -753,7 +771,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, uchar forminfo[288]; uchar *record; uchar *disk_buff, *strpos, *null_flags, *null_pos; - ulong pos, record_offset; + ulong pos, record_offset; ulong *rec_per_key= NULL; ulong rec_buff_length; handler *handler_file= 0; @@ -6033,6 +6051,7 @@ bool TABLE::add_tmp_key(uint key, uint key_parts, keyinfo->algorithm= HA_KEY_ALG_UNDEF; keyinfo->flags= HA_GENERATED_KEY; keyinfo->ext_key_flags= keyinfo->flags; + keyinfo->is_statistics_from_stat_tables= FALSE; if (unique) keyinfo->flags|= HA_NOSAME; sprintf(buf, "key%i", key); @@ -6043,6 +6062,8 @@ bool TABLE::add_tmp_key(uint key, uint key_parts, if (!keyinfo->rec_per_key) return TRUE; bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts); + keyinfo->read_stats= NULL; + keyinfo->collected_stats= NULL; for (i= 0; i < key_parts; i++) { @@ -6786,6 +6807,7 @@ int TABLE_LIST::fetch_number_of_rows() { table->file->stats.records= ((select_union*)derived->result)->records; set_if_bigger(table->file->stats.records, 2); + table->used_stat_records= table->file->stats.records; } else error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); @@ -6881,3 +6903,12 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd) ext_key_parts : key_parts; } + +double KEY::actual_rec_per_key(uint i) +{ + if (rec_per_key == 0) + return 0; + return (is_statistics_from_stat_tables ? + read_stats->get_avg_frequency(i) : (double) rec_per_key[i]); +} + diff --git a/sql/table.h b/sql/table.h index a43f729ba5c..ecad83cff22 100644 --- a/sql/table.h +++ b/sql/table.h @@ -46,6 +46,7 @@ struct TABLE_LIST; class ACL_internal_schema_access; class ACL_internal_table_access; class Field; +class Table_statistics; /* Used to identify NESTED_JOIN structures within a join (applicable only to @@ -562,6 +563,21 @@ typedef I_P_List <Wait_for_flush, /** + Control block to access table statistics loaded + from persistent statistical tables +*/ + +struct TABLE_STATISTICS_CB +{ + MEM_ROOT mem_root; /* MEM_ROOT to allocate statistical data for the table */ + Table_statistics *table_stats; /* Structure to access the statistical data */ + bool stats_can_be_read; /* Memory for statistical data is allocated */ + bool stats_is_read; /* Statistical data for table has been read + from statistical tables */ +}; + + +/** This structure is shared between different table objects. There is one instance of table share per one table in the database. */ @@ -598,6 +614,8 @@ struct TABLE_SHARE KEY *key_info; /* data of keys in database */ uint *blob_field; /* Index to blobs in Field arrray*/ + TABLE_STATISTICS_CB stats_cb; + uchar *default_values; /* row with default values */ LEX_STRING comment; /* Comment about table */ CHARSET_INFO *table_charset; /* Default charset of string fields */ @@ -1029,6 +1047,15 @@ public: */ query_id_t query_id; + /* + This structure is used for statistical data on the table that + is collected by the function collect_statistics_for_table + */ + Table_statistics *collected_stats; + + /* The estimate of the number of records in the table used by optimizer */ + ha_rows used_stat_records; + /* For each key that has quick_keys.is_set(key) == TRUE: estimate of #records and max #key parts that range access would use. @@ -1275,6 +1302,7 @@ public: uint actual_n_key_parts(KEY *keyinfo); ulong actual_key_flags(KEY *keyinfo); int update_default_fields(); + inline ha_rows stat_records() { return used_stat_records; } }; diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index 6b956768287..147a59df9b7 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -173,7 +173,6 @@ void threadpool_remove_connection(THD *thd) close_connection(thd, 0); unlink_thd(thd); - mysql_mutex_unlock(&LOCK_thread_count); mysql_cond_broadcast(&COND_thread_count); /* |