From 072b39da66d9c1693ca832eb97f2d63b238cc412 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Sat, 30 Jan 2021 22:36:51 +0530 Subject: MDEV-22583: Selectivity for BIT columns in filtered column for EXPLAIN is incorrect For BIT columns when EITS is collected, we store the integral value in text representation in the min and max fields of the statistical table When this value is retrieved from the statistical table to original table field then we try to store the text representation in the original field which is INCORRECT. The value that is retrieved should be converted to integral type and that value should be stored back in the original field. This would get us the correct estimate for selectivity of the predicate. --- sql/sql_statistics.cc | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'sql/sql_statistics.cc') diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index b63172045e6..39fcfd7e6db 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1154,16 +1154,30 @@ public: switch (i) { case COLUMN_STAT_MIN_VALUE: - table_field->read_stats->min_value->set_notnull(); - stat_field->val_str(&val); - table_field->read_stats->min_value->store(val.ptr(), val.length(), - &my_charset_bin); + table_field->read_stats->min_value->set_notnull(); + if (table_field->type() == MYSQL_TYPE_BIT) + table_field->read_stats->min_value->store(stat_field->val_int(), + true); + else + { + stat_field->val_str(&val); + table_field->read_stats->min_value->store(val.ptr(), + val.length(), + &my_charset_bin); + } break; case COLUMN_STAT_MAX_VALUE: - table_field->read_stats->max_value->set_notnull(); - stat_field->val_str(&val); - table_field->read_stats->max_value->store(val.ptr(), val.length(), - &my_charset_bin); + table_field->read_stats->max_value->set_notnull(); + if (table_field->type() == MYSQL_TYPE_BIT) + table_field->read_stats->max_value->store(stat_field->val_int(), + true); + else + { + stat_field->val_str(&val); + table_field->read_stats->max_value->store(val.ptr(), + val.length(), + &my_charset_bin); + } break; case COLUMN_STAT_NULLS_RATIO: table_field->read_stats->set_nulls_ratio(stat_field->val_real()); -- cgit v1.2.1 From a461e4d306bc53134cefa0eeeb624f3d9eba70f8 Mon Sep 17 00:00:00 2001 From: Varun Gupta Date: Tue, 9 Feb 2021 20:27:21 +0530 Subject: MDEV-19474: Histogram statistics are used even with optimizer_use_condition_selectivity=3 The issue here was histogram statistics were being used even when the level of optimizer_use_condition_selectivity doesn't allow usage of statistics from histogram. The histogram statistics are read for a table only when optimizer_use_condition_selectivity > 3. But the TABLE structure can be stored in the internal table cache and be reused for the next query. So in this case the histogram statistics will be available for the next query. The fix would be to make sure to use the histogram statistics only when optimizer_use_condition_selectivity > 3. --- sql/sql_statistics.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'sql/sql_statistics.cc') diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 39fcfd7e6db..f2aa8b8063e 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3732,6 +3732,7 @@ double get_column_range_cardinality(Field *field, if (!table->stats_is_read) return tab_records; + THD *thd= table->in_use; double col_nulls= tab_records * col_stats->get_nulls_ratio(); double col_non_nulls= tab_records - col_nulls; @@ -3762,7 +3763,7 @@ double get_column_range_cardinality(Field *field, col_stats->min_max_values_are_provided()) { Histogram *hist= &col_stats->histogram; - if (hist->is_available()) + if (hist->is_usable(thd)) { store_key_image_to_rec(field, (uchar *) min_endp->key, field->key_length()); @@ -3806,10 +3807,10 @@ double get_column_range_cardinality(Field *field, max_mp_pos= 1.0; Histogram *hist= &col_stats->histogram; - if (!hist->is_available()) - sel= (max_mp_pos - min_mp_pos); - else + if (hist->is_usable(thd)) sel= hist->range_selectivity(min_mp_pos, max_mp_pos); + else + sel= (max_mp_pos - min_mp_pos); res= col_non_nulls * sel; set_if_bigger(res, col_stats->get_avg_frequency()); } -- cgit v1.2.1 From caad32ca927fa583b292a9d1d77f091affecfc5a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 1 Feb 2021 12:57:14 +0100 Subject: Revert "MDEV-23753: SIGSEGV in Column_stat::store_stat_fields" This reverts the commit 3b94309a6c but keeps the test Because the fix is a hack that isn't supposed to do anything, and relies on a side-effect of rnd_init inside ha_partition. A different fix is coming up. --- sql/sql_statistics.cc | 4 ---- 1 file changed, 4 deletions(-) (limited to 'sql/sql_statistics.cc') diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index f2aa8b8063e..b7116f701ae 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -2124,10 +2124,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) ulonglong *idx_avg_frequency= (ulonglong*) alloc_root(&table->mem_root, sizeof(ulonglong) * key_parts); - if (table->file->ha_rnd_init(TRUE)) - DBUG_RETURN(1); - table->file->ha_rnd_end(); - uint columns= 0; for (field_ptr= table->field; *field_ptr; field_ptr++) { -- cgit v1.2.1 From 06a791aa129d80762d6e999a8d05940b52eda16c Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 1 Feb 2021 13:01:12 +0100 Subject: MDEV-23753: SIGSEGV in Column_stat::store_stat_fields only collect persistent stats for columns explicitly listed by the user in the ANALYZE TABLE PERSISTENT FOR COLUMNS (...) clause. The engine can extend table->read_set as much as it wants, it should not affect the collected statistics. Test case from the 3b94309a6c applies - it used to crash, because ha_partition extended table->read_set after the loop that initialized some objects based on bits in the read_set but before the loop that used these objects based on bits in the read_set. --- sql/sql_statistics.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'sql/sql_statistics.cc') diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index b7116f701ae..b9d70a1db67 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -2155,15 +2155,13 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++) { - (*field_ptr)->collected_stats= column_stats; - (*field_ptr)->collected_stats->max_value= NULL; - (*field_ptr)->collected_stats->min_value= NULL; if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) { column_stats->histogram.set_size(hist_size); column_stats->histogram.set_type(hist_type); column_stats->histogram.set_values(histogram); histogram+= hist_size; + (*field_ptr)->collected_stats= column_stats; } } @@ -2612,7 +2610,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table) for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; - if (!bitmap_is_set(table->read_set, table_field->field_index)) + if (!table_field->collected_stats) continue; table_field->collected_stats->init(thd, table_field); } @@ -2639,7 +2637,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table) for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; - if (!bitmap_is_set(table->read_set, table_field->field_index)) + if (!table_field->collected_stats) continue; if ((rc= table_field->collected_stats->add(rows))) break; @@ -2667,7 +2665,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table) for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; - if (!bitmap_is_set(table->read_set, table_field->field_index)) + if (!table_field->collected_stats) continue; bitmap_set_bit(table->write_set, table_field->field_index); if (!rc) @@ -2771,7 +2769,7 @@ int update_statistics_for_table(THD *thd, TABLE *table) for (Field **field_ptr= table->field; *field_ptr; field_ptr++) { Field *table_field= *field_ptr; - if (!bitmap_is_set(table->read_set, table_field->field_index)) + if (!table_field->collected_stats) continue; restore_record(stat_table, s->default_values); column_stat.set_key_fields(table_field); -- cgit v1.2.1 From c4f0133444d2a867ee79f5c16b9d7829a05861cd Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 1 Feb 2021 13:16:40 +0100 Subject: cleanup: stat tables don't allocate Column_statistics_collected objects that won't be used. minor style fixes (StringBuffer<>, etc) --- sql/sql_statistics.cc | 54 +++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 30 deletions(-) (limited to 'sql/sql_statistics.cc') diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index b9d70a1db67..59c6423e388 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1042,15 +1042,15 @@ public: void store_stat_fields() { - char buff[MAX_FIELD_WIDTH]; - String val(buff, sizeof(buff), &my_charset_bin); + StringBuffer val; MY_BITMAP *old_map= dbug_tmp_use_all_columns(stat_table, &stat_table->read_set); for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++) { Field *stat_field= stat_table->field[i]; - if (table_field->collected_stats->is_null(i)) + Column_statistics *stats= table_field->collected_stats; + if (stats->is_null(i)) stat_field->set_null(); else { @@ -1058,10 +1058,10 @@ public: switch (i) { case COLUMN_STAT_MIN_VALUE: if (table_field->type() == MYSQL_TYPE_BIT) - stat_field->store(table_field->collected_stats->min_value->val_int(),true); + stat_field->store(stats->min_value->val_int(),true); else { - table_field->collected_stats->min_value->val_str(&val); + stats->min_value->val_str(&val); uint32 length= Well_formed_prefix(val.charset(), val.ptr(), MY_MIN(val.length(), stat_field->field_length)).length(); stat_field->store(val.ptr(), length, &my_charset_bin); @@ -1069,37 +1069,33 @@ public: break; case COLUMN_STAT_MAX_VALUE: if (table_field->type() == MYSQL_TYPE_BIT) - stat_field->store(table_field->collected_stats->max_value->val_int(),true); + stat_field->store(stats->max_value->val_int(),true); else { - table_field->collected_stats->max_value->val_str(&val); + stats->max_value->val_str(&val); uint32 length= Well_formed_prefix(val.charset(), val.ptr(), MY_MIN(val.length(), stat_field->field_length)).length(); stat_field->store(val.ptr(), length, &my_charset_bin); } break; case COLUMN_STAT_NULLS_RATIO: - stat_field->store(table_field->collected_stats->get_nulls_ratio()); + stat_field->store(stats->get_nulls_ratio()); break; case COLUMN_STAT_AVG_LENGTH: - stat_field->store(table_field->collected_stats->get_avg_length()); + stat_field->store(stats->get_avg_length()); break; case COLUMN_STAT_AVG_FREQUENCY: - stat_field->store(table_field->collected_stats->get_avg_frequency()); + stat_field->store(stats->get_avg_frequency()); break; case COLUMN_STAT_HIST_SIZE: - stat_field->store(table_field->collected_stats->histogram.get_size()); + stat_field->store(stats->histogram.get_size()); break; case COLUMN_STAT_HIST_TYPE: - stat_field->store(table_field->collected_stats->histogram.get_type() + - 1); + stat_field->store(stats->histogram.get_type() + 1); break; case COLUMN_STAT_HISTOGRAM: - const char * col_histogram= - (const char *) (table_field->collected_stats->histogram.get_values()); - stat_field->store(col_histogram, - table_field->collected_stats->histogram.get_size(), - &my_charset_bin); + stat_field->store((char *)stats->histogram.get_values(), + stats->histogram.get_size(), &my_charset_bin); break; } } @@ -2100,20 +2096,24 @@ void create_min_max_statistical_fields_for_table_share(THD *thd, int alloc_statistics_for_table(THD* thd, TABLE *table) { Field **field_ptr; - uint fields; DBUG_ENTER("alloc_statistics_for_table"); + uint columns= 0; + for (field_ptr= table->field; *field_ptr; field_ptr++) + { + if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) + columns++; + } Table_statistics *table_stats= (Table_statistics *) alloc_root(&table->mem_root, sizeof(Table_statistics)); - fields= table->s->fields ; Column_statistics_collected *column_stats= (Column_statistics_collected *) alloc_root(&table->mem_root, sizeof(Column_statistics_collected) * - (fields+1)); + columns); uint keys= table->s->keys; Index_statistics *index_stats= @@ -2124,12 +2124,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) ulonglong *idx_avg_frequency= (ulonglong*) alloc_root(&table->mem_root, sizeof(ulonglong) * key_parts); - uint columns= 0; - for (field_ptr= table->field; *field_ptr; field_ptr++) - { - if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) - columns++; - } uint hist_size= thd->variables.histogram_size; Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type); uchar *histogram= NULL; @@ -2151,9 +2145,9 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) table_stats->idx_avg_frequency= idx_avg_frequency; table_stats->histograms= histogram; - memset(column_stats, 0, sizeof(Column_statistics) * (fields+1)); + memset(column_stats, 0, sizeof(Column_statistics) * columns); - for (field_ptr= table->field; *field_ptr; field_ptr++, column_stats++) + for (field_ptr= table->field; *field_ptr; field_ptr++) { if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) { @@ -2161,7 +2155,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) column_stats->histogram.set_type(hist_type); column_stats->histogram.set_values(histogram); histogram+= hist_size; - (*field_ptr)->collected_stats= column_stats; + (*field_ptr)->collected_stats= column_stats++; } } -- cgit v1.2.1