diff options
author | Sergei Petrunia <psergey@askmonty.org> | 2022-01-19 18:35:27 +0300 |
---|---|---|
committer | Sergei Petrunia <psergey@askmonty.org> | 2022-01-19 18:35:27 +0300 |
commit | da78030ec87e30d2218a0de182ffb87fc541da97 (patch) | |
tree | 7f7ce9bb818d56679eaf034b52f05738b8d83da1 /sql/sql_statistics.cc | |
parent | e222e44d1bfc995870430bb90d8ac97e91f66cb4 (diff) | |
parent | ce4956f3229a8b2c26a4913fdbc190b5c822cb8e (diff) | |
download | mariadb-git-preview-10.8-MDEV-26519-json-histograms.tar.gz |
Merge MDEV-26519: JSON_HB histograms into 10.8preview-10.8-MDEV-26519-json-histograms
Diffstat (limited to 'sql/sql_statistics.cc')
-rw-r--r-- | sql/sql_statistics.cc | 490 |
1 files changed, 325 insertions, 165 deletions
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 1f034f490c8..84d0902193b 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -28,11 +28,15 @@ #include "sql_base.h" #include "key.h" #include "sql_statistics.h" +#include "opt_histogram_json.h" #include "opt_range.h" #include "uniques.h" #include "sql_show.h" #include "sql_partition.h" +#include <vector> +#include <string> + /* The system variable 'use_stat_tables' can take one of the following values: @@ -57,8 +61,11 @@ the collected statistics in the persistent statistical tables only when the value of the variable 'use_stat_tables' is not equal to "never". -*/ - +*/ + +Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type, + THD *owner); + /* Currently there are only 3 persistent statistical tables */ static const uint STATISTICS_TABLES= 3; @@ -178,12 +185,12 @@ TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] = }, { { STRING_WITH_LEN("hist_type") }, - { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB')") }, + { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") }, { STRING_WITH_LEN("utf8mb3") } }, { { STRING_WITH_LEN("histogram") }, - { STRING_WITH_LEN("varbinary(255)") }, + { STRING_WITH_LEN("longblob") }, { NULL, 0 } } }; @@ -307,7 +314,7 @@ public: inline void init(THD *thd, Field * table_field); inline bool add(); - inline void finish(ha_rows rows, double sample_fraction); + inline bool finish(MEM_ROOT *mem_root, ha_rows rows, double sample_fraction); inline void cleanup(); }; @@ -1064,15 +1071,23 @@ public: stat_field->store(stats->get_avg_frequency()); break; case COLUMN_STAT_HIST_SIZE: - stat_field->store(stats->histogram.get_size()); + // Note: this is dumb. the histogram size is stored with the + // histogram! + stat_field->store(stats->histogram? + stats->histogram->get_size() : 0); break; case COLUMN_STAT_HIST_TYPE: - stat_field->store(stats->histogram.get_type() + 1); + if (stats->histogram) + stat_field->store(stats->histogram->get_type() + 1); + else + stat_field->set_null(); break; case COLUMN_STAT_HISTOGRAM: - stat_field->store((char *)stats->histogram.get_values(), - stats->histogram.get_size(), &my_charset_bin); - break; + if (stats->histogram) + stats->histogram->serialize(stat_field); + else + stat_field->set_null(); + break; } } } @@ -1100,6 +1115,8 @@ public: void get_stat_values() { table_field->read_stats->set_all_nulls(); + // default: hist_type=NULL means there's no histogram + table_field->read_stats->histogram_type_on_disk= INVALID_HISTOGRAM; if (table_field->read_stats->min_value) table_field->read_stats->min_value->set_null(); @@ -1111,7 +1128,7 @@ public: char buff[MAX_FIELD_WIDTH]; String val(buff, sizeof(buff), &my_charset_bin); - for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_TYPE; i++) + for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++) { Field *stat_field= stat_table->field[i]; @@ -1155,13 +1172,28 @@ public: table_field->read_stats->set_avg_frequency(stat_field->val_real()); break; case COLUMN_STAT_HIST_SIZE: - table_field->read_stats->histogram.set_size(stat_field->val_int()); + /* + Ignore the contents of mysql.column_stats.hist_size. We take the + size from the mysql.column_stats.histogram column, itself. + */ break; case COLUMN_STAT_HIST_TYPE: - Histogram_type hist_type= (Histogram_type) (stat_field->val_int() - - 1); - table_field->read_stats->histogram.set_type(hist_type); - break; + { + /* + Save the histogram type. The histogram itself will be read in + read_histograms_for_table(). + */ + Histogram_type hist_type= (Histogram_type) (stat_field->val_int() - + 1); + table_field->read_stats->histogram_type_on_disk= hist_type; + break; + } + case COLUMN_STAT_HISTOGRAM: + /* + Do nothing here: we take the histogram length from the 'histogram' + column itself + */ + break; } } } @@ -1182,9 +1214,9 @@ public: The method assumes that the value of histogram size and the pointer to the histogram location has been already set in the fields size and values of read_stats->histogram. - */ + */ - void get_histogram_value() + Histogram_base * load_histogram(MEM_ROOT *mem_root) { if (find_stat()) { @@ -1194,14 +1226,60 @@ public: Field *stat_field= stat_table->field[fldno]; table_field->read_stats->set_not_null(fldno); stat_field->val_str(&val); - memcpy(table_field->read_stats->histogram.get_values(), - val.ptr(), table_field->read_stats->histogram.get_size()); + Histogram_type hist_type= + table_field->read_stats->histogram_type_on_disk; + + Histogram_base *hist; + if (!(hist= create_histogram(mem_root, hist_type, NULL))) + return NULL; + Field *field= table->field[table_field->field_index]; + if (!hist->parse(mem_root, db_name->str, table_name->str, + field, hist_type, + val.ptr(), val.length())) + { + table_field->read_stats->histogram= hist; + return hist; + } + else + delete hist; } + return NULL; } - }; +bool Histogram_binary::parse(MEM_ROOT *mem_root, const char*, const char*, + Field*, Histogram_type type_arg, + const char *hist_data, size_t hist_data_len) +{ + /* On-disk an in-memory formats are the same. Just copy the data. */ + type= type_arg; + size= (uint8) hist_data_len; // 'size' holds the size of histogram in bytes + if (!(values= (uchar*)alloc_root(mem_root, hist_data_len))) + return true; + + memcpy(values, hist_data, hist_data_len); + return false; +} + +/* + Save the histogram data info a table field. +*/ +void Histogram_binary::serialize(Field *field) +{ + field->store((char*)values, size, &my_charset_bin); +} + +void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, + Histogram_type htype_arg, + ulonglong size_arg) +{ + type= htype_arg; + values= (uchar*)alloc_root(mem_root, (size_t)size_arg); + size= (uint8) size_arg; +} + + /* An object of the class Index_stat is created to read statistical data on tables from the statistical table table_stat, to update @@ -1512,62 +1590,39 @@ public: } }; -/* - Histogram_builder is a helper class that is used to build histograms - for columns -*/ - -class Histogram_builder +class Histogram_binary_builder : public Histogram_builder { - Field *column; /* table field for which the histogram is built */ - uint col_length; /* size of this field */ - ha_rows records; /* number of records the histogram is built for */ Field *min_value; /* pointer to the minimal value for the field */ Field *max_value; /* pointer to the maximal value for the field */ - Histogram *histogram; /* the histogram location */ + Histogram_binary *histogram; /* the histogram location */ uint hist_width; /* the number of points in the histogram */ double bucket_capacity; /* number of rows in a bucket of the histogram */ uint curr_bucket; /* number of the current bucket to be built */ - ulonglong count; /* number of values retrieved */ - ulonglong count_distinct; /* number of distinct values retrieved */ - /* number of distinct values that occured only once */ - ulonglong count_distinct_single_occurence; -public: - Histogram_builder(Field *col, uint col_len, ha_rows rows) - : column(col), col_length(col_len), records(rows) +public: + Histogram_binary_builder(Field *col, uint col_len, ha_rows rows) + : Histogram_builder(col, col_len, rows) { Column_statistics *col_stats= col->collected_stats; min_value= col_stats->min_value; max_value= col_stats->max_value; - histogram= &col_stats->histogram; + histogram= (Histogram_binary*)col_stats->histogram; hist_width= histogram->get_width(); bucket_capacity= (double) records / (hist_width + 1); curr_bucket= 0; - count= 0; - count_distinct= 0; - count_distinct_single_occurence= 0; } - ulonglong get_count_distinct() const { return count_distinct; } - ulonglong get_count_single_occurence() const + int next(void *elem, element_count elem_cnt) override { - return count_distinct_single_occurence; - } - - int next(void *elem, element_count elem_cnt) - { - count_distinct++; - if (elem_cnt == 1) - count_distinct_single_occurence++; - count+= elem_cnt; + counters.next(elem, elem_cnt); + ulonglong count= counters.get_count(); if (curr_bucket == hist_width) return 0; if (count > bucket_capacity * (curr_bucket + 1)) { column->store_field_value((uchar *) elem, col_length); histogram->set_value(curr_bucket, - column->pos_in_interval(min_value, max_value)); + column->pos_in_interval(min_value, max_value)); curr_bucket++; while (curr_bucket != hist_width && count > bucket_capacity * (curr_bucket + 1)) @@ -1578,25 +1633,51 @@ public: } return 0; } + void finalize() override {} }; +Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len, + ha_rows rows) +{ + return new Histogram_binary_builder(col, col_len, rows); +} + + +Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type, + THD *owner) +{ + Histogram_base *res= NULL; + switch (hist_type) { + case SINGLE_PREC_HB: + case DOUBLE_PREC_HB: + res= new Histogram_binary(); + break; + case JSON_HB: + res= new Histogram_json_hb(); + break; + default: + DBUG_ASSERT(0); + } + + if (res) + res->set_owner(owner); + return res; +} + + C_MODE_START -int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) +static int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) { Histogram_builder *hist_builder= (Histogram_builder *) arg; return hist_builder->next(elem, elem_cnt); } - - -static int count_distinct_single_occurence_walk(void *elem, - element_count count, void *arg) +int basic_stats_collector_walk(void *elem, element_count count, + void *arg) { - ((ulonglong*)arg)[0]+= 1; - if (count == 1) - ((ulonglong*)arg)[1]+= 1; + ((Basic_stats_collector*)arg)->next(elem, count); return 0; } @@ -1681,23 +1762,35 @@ public: */ void walk_tree() { - ulonglong counts[2] = {0, 0}; - tree->walk(table_field->table, - count_distinct_single_occurence_walk, counts); - distincts= counts[0]; - distincts_single_occurence= counts[1]; + Basic_stats_collector stats_collector; + tree->walk(table_field->table, basic_stats_collector_walk, + (void*)&stats_collector ); + distincts= stats_collector.get_count_distinct(); + distincts_single_occurence= stats_collector.get_count_single_occurence(); } /* @brief Calculate a histogram of the tree */ - void walk_tree_with_histogram(ha_rows rows) + bool walk_tree_with_histogram(ha_rows rows) { - Histogram_builder hist_builder(table_field, tree_key_length, rows); - tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder); - distincts= hist_builder.get_count_distinct(); - distincts_single_occurence= hist_builder.get_count_single_occurence(); + Histogram_base *hist= table_field->collected_stats->histogram; + Histogram_builder *hist_builder= + hist->create_builder(table_field, tree_key_length, rows); + + if (tree->walk(table_field->table, histogram_build_walk, + (void*)hist_builder)) + { + delete hist_builder; + return true; // Error + } + hist_builder->finalize(); + distincts= hist_builder->counters.get_count_distinct(); + distincts_single_occurence= hist_builder->counters. + get_count_single_occurence(); + delete hist_builder; + return false; } ulonglong get_count_distinct() @@ -1712,20 +1805,11 @@ public: /* @brief - Get the size of the histogram in bytes built for table_field - */ - uint get_hist_size() - { - return table_field->collected_stats->histogram.get_size(); - } - - /* - @brief Get the pointer to the histogram built for table_field */ - uchar *get_histogram() + Histogram_base *get_histogram() { - return table_field->collected_stats->histogram.get_values(); + return table_field->collected_stats->histogram; } }; @@ -2125,26 +2209,13 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) ulonglong *idx_avg_frequency= (ulonglong*) alloc_root(&table->mem_root, sizeof(ulonglong) * key_parts); - uint hist_size= thd->variables.histogram_size; - Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type); - uchar *histogram= NULL; - if (hist_size > 0) - { - if ((histogram= (uchar *) alloc_root(&table->mem_root, - hist_size * columns))) - bzero(histogram, hist_size * columns); - - } - - if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency || - (hist_size && !histogram)) + if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency) DBUG_RETURN(1); table->collected_stats= table_stats; table_stats->column_stats= column_stats; table_stats->index_stats= index_stats; table_stats->idx_avg_frequency= idx_avg_frequency; - table_stats->histograms= histogram; memset(column_stats, 0, sizeof(Column_statistics) * columns); @@ -2152,10 +2223,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) { if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) { - column_stats->histogram.set_size(hist_size); - column_stats->histogram.set_type(hist_type); - column_stats->histogram.set_values(histogram); - histogram+= hist_size; + column_stats->histogram = NULL; (*field_ptr)->collected_stats= column_stats++; } } @@ -2177,6 +2245,25 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) DBUG_RETURN(0); } +/* + Free the "local" statistics for table. + We only free the statistics that is not on MEM_ROOT and needs to be + explicitly freed. +*/ +void free_statistics_for_table(THD *thd, TABLE *table) +{ + for (Field **field_ptr= table->field; *field_ptr; field_ptr++) + { + // Only delete the histograms that are exclusivly owned by this thread + if ((*field_ptr)->collected_stats && + (*field_ptr)->collected_stats->histogram && + (*field_ptr)->collected_stats->histogram->get_owner() == thd) + { + delete (*field_ptr)->collected_stats->histogram; + (*field_ptr)->collected_stats->histogram= NULL; + } + } +} /** @brief @@ -2383,7 +2470,8 @@ bool Column_statistics_collected::add() */ inline -void Column_statistics_collected::finish(ha_rows rows, double sample_fraction) +bool Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, + double sample_fraction) { double val; @@ -2401,13 +2489,32 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction) } if (count_distinct) { - uint hist_size= count_distinct->get_hist_size(); + uint hist_size= current_thd->variables.histogram_size; + Histogram_type hist_type= + (Histogram_type) (current_thd->variables.histogram_type); + bool have_histogram= false; + if (hist_size != 0 && hist_type != INVALID_HISTOGRAM) + { + have_histogram= true; + histogram= create_histogram(mem_root, hist_type, current_thd); + histogram->init_for_collection(mem_root, hist_type, hist_size); + } /* Compute cardinality statistics and optionally histogram. */ - if (hist_size == 0) + if (!have_histogram) count_distinct->walk_tree(); else - count_distinct->walk_tree_with_histogram(rows - nulls); + { + if (count_distinct->walk_tree_with_histogram(rows - nulls)) + { + delete histogram; + histogram= NULL; + + delete count_distinct; + count_distinct= NULL; + return true; // Error + } + } ulonglong distincts= count_distinct->get_count_distinct(); ulonglong distincts_single_occurence= @@ -2442,15 +2549,14 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction) set_not_null(COLUMN_STAT_AVG_FREQUENCY); } else - hist_size= 0; - histogram.set_size(hist_size); + have_histogram= false; + set_not_null(COLUMN_STAT_HIST_SIZE); - if (hist_size && distincts) + if (have_histogram && distincts && histogram) { set_not_null(COLUMN_STAT_HIST_TYPE); - histogram.set_values(count_distinct->get_histogram()); set_not_null(COLUMN_STAT_HISTOGRAM); - } + } delete count_distinct; count_distinct= NULL; } @@ -2459,7 +2565,8 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction) val= 1.0; set_avg_frequency(val); set_not_null(COLUMN_STAT_AVG_FREQUENCY); - } + } + return false; } @@ -2710,7 +2817,10 @@ int collect_statistics_for_table(THD *thd, TABLE *table) continue; bitmap_set_bit(table->write_set, table_field->field_index); if (!rc) - table_field->collected_stats->finish(rows, sample_fraction); + { + rc= table_field->collected_stats->finish(&table->mem_root, rows, + sample_fraction); + } else table_field->collected_stats->cleanup(); } @@ -2790,7 +2900,7 @@ int update_statistics_for_table(THD *thd, TABLE *table) start_new_trans new_trans(thd); - if (open_stat_tables(thd, tables, TRUE)) + if ((open_stat_tables(thd, tables, TRUE))) DBUG_RETURN(rc); save_binlog_format= thd->set_current_stmt_binlog_format_stmt(); @@ -2916,16 +3026,17 @@ int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) /* Read statistics from the statistical table column_stats */ stat_table= stat_tables[COLUMN_STAT].table; - ulong total_hist_size= 0; + bool have_histograms= false; Column_stat column_stat(stat_table, table); for (field_ptr= table_share->field; *field_ptr; field_ptr++) { table_field= *field_ptr; column_stat.set_key_fields(table_field); column_stat.get_stat_values(); - total_hist_size+= table_field->read_stats->histogram.get_size(); + if (table_field->read_stats->histogram_type_on_disk != INVALID_HISTOGRAM) + have_histograms= true; } - table_share->stats_cb.total_hist_size= total_hist_size; + table_share->stats_cb.have_histograms= have_histograms; /* Read statistics from the statistical table index_stats */ stat_table= stat_tables[INDEX_STAT].table; @@ -3021,6 +3132,9 @@ void delete_stat_values_for_table_share(TABLE_SHARE *table_share) delete column_stats->max_value; column_stats->max_value= NULL; } + + delete column_stats->histogram; + column_stats->histogram=NULL; } } @@ -3065,28 +3179,28 @@ int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables) if (stats_cb->start_histograms_load()) { - uchar *histogram= (uchar *) alloc_root(&stats_cb->mem_root, - stats_cb->total_hist_size); - if (!histogram) - { - stats_cb->abort_histograms_load(); - DBUG_RETURN(1); - } - memset(histogram, 0, stats_cb->total_hist_size); - Column_stat column_stat(stat_tables[COLUMN_STAT].table, table); + + /* + The process of histogram loading makes use of the field it is for. Mark + all fields as readable/writable in order to allow that. + */ + MY_BITMAP *old_sets[2]; + dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set); + for (Field **field_ptr= table->s->field; *field_ptr; field_ptr++) { Field *table_field= *field_ptr; - if (uint hist_size= table_field->read_stats->histogram.get_size()) + if (table_field->read_stats->histogram_type_on_disk != INVALID_HISTOGRAM) { column_stat.set_key_fields(table_field); - table_field->read_stats->histogram.set_values(histogram); - column_stat.get_histogram_value(); - histogram+= hist_size; + table_field->read_stats->histogram= + column_stat.load_histogram(&stats_cb->mem_root); } } stats_cb->end_histograms_load(); + + dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets); } table->histograms_are_read= true; DBUG_RETURN(0); @@ -3775,15 +3889,11 @@ double get_column_range_cardinality(Field *field, if (avg_frequency > 1.0 + 0.000001 && col_stats->min_max_values_are_provided()) { - Histogram *hist= &col_stats->histogram; - if (hist->is_usable(thd)) + Histogram_base *hist = col_stats->histogram; + if (hist && hist->is_usable(thd)) { - store_key_image_to_rec(field, (uchar *) min_endp->key, - field->key_length()); - double pos= field->pos_in_interval(col_stats->min_value, - col_stats->max_value); res= col_non_nulls * - hist->point_selectivity(pos, + hist->point_selectivity(field, min_endp, avg_frequency / col_non_nulls); } } @@ -3798,34 +3908,41 @@ double get_column_range_cardinality(Field *field, { if (col_stats->min_max_values_are_provided()) { - double sel, min_mp_pos, max_mp_pos; - - if (min_endp && !(field->null_ptr && min_endp->key[0])) + Histogram_base *hist= col_stats->histogram; + double avg_frequency= col_stats->get_avg_frequency(); + double sel; + if (hist && hist->is_usable(thd)) { - store_key_image_to_rec(field, (uchar *) min_endp->key, - field->key_length()); - min_mp_pos= field->pos_in_interval(col_stats->min_value, - col_stats->max_value); + sel= hist->range_selectivity(field, min_endp, max_endp, + avg_frequency / col_non_nulls); + res= col_non_nulls * sel; } else - min_mp_pos= 0.0; - if (max_endp) { - store_key_image_to_rec(field, (uchar *) max_endp->key, - field->key_length()); - max_mp_pos= field->pos_in_interval(col_stats->min_value, - col_stats->max_value); - } - else - max_mp_pos= 1.0; + double min_mp_pos, max_mp_pos; + if (min_endp && !(field->null_ptr && min_endp->key[0])) + { + store_key_image_to_rec(field, (uchar *) min_endp->key, + field->key_length()); + min_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + min_mp_pos= 0.0; + if (max_endp) + { + store_key_image_to_rec(field, (uchar *) max_endp->key, + field->key_length()); + max_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + max_mp_pos= 1.0; - Histogram *hist= &col_stats->histogram; - if (hist->is_usable(thd)) - sel= hist->range_selectivity(min_mp_pos, max_mp_pos); - else - sel= (max_mp_pos - min_mp_pos); - res= col_non_nulls * sel; - set_if_bigger(res, col_stats->get_avg_frequency()); + sel = (max_mp_pos - min_mp_pos); + res= col_non_nulls * sel; + set_if_bigger(res, avg_frequency); + } } else res= col_non_nulls; @@ -3835,13 +3952,13 @@ double get_column_range_cardinality(Field *field, return res; } - - /* Estimate selectivity of "col=const" using a histogram - @param pos Position of the "const" between column's min_value and - max_value. This is a number in [0..1] range. + @param field the field to estimate its selectivity. + + @param endpoint The constant + @param avg_sel Average selectivity of condition "col=const" in this table. It is calcuated as (#non_null_values / #distinct_values). @@ -3870,9 +3987,15 @@ double get_column_range_cardinality(Field *field, value. */ -double Histogram::point_selectivity(double pos, double avg_sel) +double Histogram_binary::point_selectivity(Field *field, key_range *endpoint, + double avg_sel) { double sel; + Column_statistics *col_stats= field->read_stats; + store_key_image_to_rec(field, (uchar *) endpoint->key, + field->key_length()); + double pos= field->pos_in_interval(col_stats->min_value, + col_stats->max_value); /* Find the bucket that contains the value 'pos'. */ uint min= find_bucket(pos, TRUE); uint pos_value= (uint) (pos * prec_factor()); @@ -3906,7 +4029,7 @@ double Histogram::point_selectivity(double pos, double avg_sel) /* The value 'pos' fits within one single histogram bucket. - Histogram buckets have the same numbers of rows, but they cover + Histogram_binary buckets have the same numbers of rows, but they cover different ranges of values. We assume that values are uniformly distributed across the [0..1] value @@ -3951,6 +4074,43 @@ double Histogram::point_selectivity(double pos, double avg_sel) return sel; } + +double Histogram_binary::range_selectivity(Field *field, + key_range *min_endp, + key_range *max_endp, + double avg_sel) +{ + double sel, min_mp_pos, max_mp_pos; + Column_statistics *col_stats= field->read_stats; + + if (min_endp && !(field->null_ptr && min_endp->key[0])) + { + store_key_image_to_rec(field, (uchar *) min_endp->key, + field->key_length()); + min_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + min_mp_pos= 0.0; + if (max_endp) + { + store_key_image_to_rec(field, (uchar *) max_endp->key, + field->key_length()); + max_mp_pos= + field->pos_in_interval(col_stats->min_value, col_stats->max_value); + } + else + max_mp_pos= 1.0; + + double bucket_sel= 1.0 / (get_width() + 1); + uint min= find_bucket(min_mp_pos, TRUE); + uint max= find_bucket(max_mp_pos, FALSE); + sel= bucket_sel * (max - min + 1); + + set_if_bigger(sel, avg_sel); + return sel; +} + /* Check whether the table is one of the persistent statistical tables. */ |