summaryrefslogtreecommitdiff
path: root/sql/sql_statistics.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_statistics.cc')
-rw-r--r--sql/sql_statistics.cc490
1 files changed, 325 insertions, 165 deletions
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 1f034f490c8..84d0902193b 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -28,11 +28,15 @@
#include "sql_base.h"
#include "key.h"
#include "sql_statistics.h"
+#include "opt_histogram_json.h"
#include "opt_range.h"
#include "uniques.h"
#include "sql_show.h"
#include "sql_partition.h"
+#include <vector>
+#include <string>
+
/*
The system variable 'use_stat_tables' can take one of the
following values:
@@ -57,8 +61,11 @@
the collected statistics in the persistent statistical tables only
when the value of the variable 'use_stat_tables' is not
equal to "never".
-*/
-
+*/
+
+Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
+ THD *owner);
+
/* Currently there are only 3 persistent statistical tables */
static const uint STATISTICS_TABLES= 3;
@@ -178,12 +185,12 @@ TABLE_FIELD_TYPE column_stat_fields[COLUMN_STAT_N_FIELDS] =
},
{
{ STRING_WITH_LEN("hist_type") },
- { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB')") },
+ { STRING_WITH_LEN("enum('SINGLE_PREC_HB','DOUBLE_PREC_HB','JSON_HB')") },
{ STRING_WITH_LEN("utf8mb3") }
},
{
{ STRING_WITH_LEN("histogram") },
- { STRING_WITH_LEN("varbinary(255)") },
+ { STRING_WITH_LEN("longblob") },
{ NULL, 0 }
}
};
@@ -307,7 +314,7 @@ public:
inline void init(THD *thd, Field * table_field);
inline bool add();
- inline void finish(ha_rows rows, double sample_fraction);
+ inline bool finish(MEM_ROOT *mem_root, ha_rows rows, double sample_fraction);
inline void cleanup();
};
@@ -1064,15 +1071,23 @@ public:
stat_field->store(stats->get_avg_frequency());
break;
case COLUMN_STAT_HIST_SIZE:
- stat_field->store(stats->histogram.get_size());
+ // Note: this is dumb. the histogram size is stored with the
+ // histogram!
+ stat_field->store(stats->histogram?
+ stats->histogram->get_size() : 0);
break;
case COLUMN_STAT_HIST_TYPE:
- stat_field->store(stats->histogram.get_type() + 1);
+ if (stats->histogram)
+ stat_field->store(stats->histogram->get_type() + 1);
+ else
+ stat_field->set_null();
break;
case COLUMN_STAT_HISTOGRAM:
- stat_field->store((char *)stats->histogram.get_values(),
- stats->histogram.get_size(), &my_charset_bin);
- break;
+ if (stats->histogram)
+ stats->histogram->serialize(stat_field);
+ else
+ stat_field->set_null();
+ break;
}
}
}
@@ -1100,6 +1115,8 @@ public:
void get_stat_values()
{
table_field->read_stats->set_all_nulls();
+ // default: hist_type=NULL means there's no histogram
+ table_field->read_stats->histogram_type_on_disk= INVALID_HISTOGRAM;
if (table_field->read_stats->min_value)
table_field->read_stats->min_value->set_null();
@@ -1111,7 +1128,7 @@ public:
char buff[MAX_FIELD_WIDTH];
String val(buff, sizeof(buff), &my_charset_bin);
- for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_TYPE; i++)
+ for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HISTOGRAM; i++)
{
Field *stat_field= stat_table->field[i];
@@ -1155,13 +1172,28 @@ public:
table_field->read_stats->set_avg_frequency(stat_field->val_real());
break;
case COLUMN_STAT_HIST_SIZE:
- table_field->read_stats->histogram.set_size(stat_field->val_int());
+ /*
+ Ignore the contents of mysql.column_stats.hist_size. We take the
+ size from the mysql.column_stats.histogram column, itself.
+ */
break;
case COLUMN_STAT_HIST_TYPE:
- Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
- 1);
- table_field->read_stats->histogram.set_type(hist_type);
- break;
+ {
+ /*
+ Save the histogram type. The histogram itself will be read in
+ read_histograms_for_table().
+ */
+ Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
+ 1);
+ table_field->read_stats->histogram_type_on_disk= hist_type;
+ break;
+ }
+ case COLUMN_STAT_HISTOGRAM:
+ /*
+ Do nothing here: we take the histogram length from the 'histogram'
+ column itself
+ */
+ break;
}
}
}
@@ -1182,9 +1214,9 @@ public:
The method assumes that the value of histogram size and the pointer to
the histogram location has been already set in the fields size and values
of read_stats->histogram.
- */
+ */
- void get_histogram_value()
+ Histogram_base * load_histogram(MEM_ROOT *mem_root)
{
if (find_stat())
{
@@ -1194,14 +1226,60 @@ public:
Field *stat_field= stat_table->field[fldno];
table_field->read_stats->set_not_null(fldno);
stat_field->val_str(&val);
- memcpy(table_field->read_stats->histogram.get_values(),
- val.ptr(), table_field->read_stats->histogram.get_size());
+ Histogram_type hist_type=
+ table_field->read_stats->histogram_type_on_disk;
+
+ Histogram_base *hist;
+ if (!(hist= create_histogram(mem_root, hist_type, NULL)))
+ return NULL;
+ Field *field= table->field[table_field->field_index];
+ if (!hist->parse(mem_root, db_name->str, table_name->str,
+ field, hist_type,
+ val.ptr(), val.length()))
+ {
+ table_field->read_stats->histogram= hist;
+ return hist;
+ }
+ else
+ delete hist;
}
+ return NULL;
}
-
};
+bool Histogram_binary::parse(MEM_ROOT *mem_root, const char*, const char*,
+ Field*, Histogram_type type_arg,
+ const char *hist_data, size_t hist_data_len)
+{
+ /* On-disk an in-memory formats are the same. Just copy the data. */
+ type= type_arg;
+ size= (uint8) hist_data_len; // 'size' holds the size of histogram in bytes
+ if (!(values= (uchar*)alloc_root(mem_root, hist_data_len)))
+ return true;
+
+ memcpy(values, hist_data, hist_data_len);
+ return false;
+}
+
+/*
+ Save the histogram data info a table field.
+*/
+void Histogram_binary::serialize(Field *field)
+{
+ field->store((char*)values, size, &my_charset_bin);
+}
+
+void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
+ Histogram_type htype_arg,
+ ulonglong size_arg)
+{
+ type= htype_arg;
+ values= (uchar*)alloc_root(mem_root, (size_t)size_arg);
+ size= (uint8) size_arg;
+}
+
+
/*
An object of the class Index_stat is created to read statistical
data on tables from the statistical table table_stat, to update
@@ -1512,62 +1590,39 @@ public:
}
};
-/*
- Histogram_builder is a helper class that is used to build histograms
- for columns
-*/
-
-class Histogram_builder
+class Histogram_binary_builder : public Histogram_builder
{
- Field *column; /* table field for which the histogram is built */
- uint col_length; /* size of this field */
- ha_rows records; /* number of records the histogram is built for */
Field *min_value; /* pointer to the minimal value for the field */
Field *max_value; /* pointer to the maximal value for the field */
- Histogram *histogram; /* the histogram location */
+ Histogram_binary *histogram; /* the histogram location */
uint hist_width; /* the number of points in the histogram */
double bucket_capacity; /* number of rows in a bucket of the histogram */
uint curr_bucket; /* number of the current bucket to be built */
- ulonglong count; /* number of values retrieved */
- ulonglong count_distinct; /* number of distinct values retrieved */
- /* number of distinct values that occured only once */
- ulonglong count_distinct_single_occurence;
-public:
- Histogram_builder(Field *col, uint col_len, ha_rows rows)
- : column(col), col_length(col_len), records(rows)
+public:
+ Histogram_binary_builder(Field *col, uint col_len, ha_rows rows)
+ : Histogram_builder(col, col_len, rows)
{
Column_statistics *col_stats= col->collected_stats;
min_value= col_stats->min_value;
max_value= col_stats->max_value;
- histogram= &col_stats->histogram;
+ histogram= (Histogram_binary*)col_stats->histogram;
hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0;
- count= 0;
- count_distinct= 0;
- count_distinct_single_occurence= 0;
}
- ulonglong get_count_distinct() const { return count_distinct; }
- ulonglong get_count_single_occurence() const
+ int next(void *elem, element_count elem_cnt) override
{
- return count_distinct_single_occurence;
- }
-
- int next(void *elem, element_count elem_cnt)
- {
- count_distinct++;
- if (elem_cnt == 1)
- count_distinct_single_occurence++;
- count+= elem_cnt;
+ counters.next(elem, elem_cnt);
+ ulonglong count= counters.get_count();
if (curr_bucket == hist_width)
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
column->store_field_value((uchar *) elem, col_length);
histogram->set_value(curr_bucket,
- column->pos_in_interval(min_value, max_value));
+ column->pos_in_interval(min_value, max_value));
curr_bucket++;
while (curr_bucket != hist_width &&
count > bucket_capacity * (curr_bucket + 1))
@@ -1578,25 +1633,51 @@ public:
}
return 0;
}
+ void finalize() override {}
};
+Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len,
+ ha_rows rows)
+{
+ return new Histogram_binary_builder(col, col_len, rows);
+}
+
+
+Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
+ THD *owner)
+{
+ Histogram_base *res= NULL;
+ switch (hist_type) {
+ case SINGLE_PREC_HB:
+ case DOUBLE_PREC_HB:
+ res= new Histogram_binary();
+ break;
+ case JSON_HB:
+ res= new Histogram_json_hb();
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+
+ if (res)
+ res->set_owner(owner);
+ return res;
+}
+
+
C_MODE_START
-int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
+static int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
{
Histogram_builder *hist_builder= (Histogram_builder *) arg;
return hist_builder->next(elem, elem_cnt);
}
-
-
-static int count_distinct_single_occurence_walk(void *elem,
- element_count count, void *arg)
+int basic_stats_collector_walk(void *elem, element_count count,
+ void *arg)
{
- ((ulonglong*)arg)[0]+= 1;
- if (count == 1)
- ((ulonglong*)arg)[1]+= 1;
+ ((Basic_stats_collector*)arg)->next(elem, count);
return 0;
}
@@ -1681,23 +1762,35 @@ public:
*/
void walk_tree()
{
- ulonglong counts[2] = {0, 0};
- tree->walk(table_field->table,
- count_distinct_single_occurence_walk, counts);
- distincts= counts[0];
- distincts_single_occurence= counts[1];
+ Basic_stats_collector stats_collector;
+ tree->walk(table_field->table, basic_stats_collector_walk,
+ (void*)&stats_collector );
+ distincts= stats_collector.get_count_distinct();
+ distincts_single_occurence= stats_collector.get_count_single_occurence();
}
/*
@brief
Calculate a histogram of the tree
*/
- void walk_tree_with_histogram(ha_rows rows)
+ bool walk_tree_with_histogram(ha_rows rows)
{
- Histogram_builder hist_builder(table_field, tree_key_length, rows);
- tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder);
- distincts= hist_builder.get_count_distinct();
- distincts_single_occurence= hist_builder.get_count_single_occurence();
+ Histogram_base *hist= table_field->collected_stats->histogram;
+ Histogram_builder *hist_builder=
+ hist->create_builder(table_field, tree_key_length, rows);
+
+ if (tree->walk(table_field->table, histogram_build_walk,
+ (void*)hist_builder))
+ {
+ delete hist_builder;
+ return true; // Error
+ }
+ hist_builder->finalize();
+ distincts= hist_builder->counters.get_count_distinct();
+ distincts_single_occurence= hist_builder->counters.
+ get_count_single_occurence();
+ delete hist_builder;
+ return false;
}
ulonglong get_count_distinct()
@@ -1712,20 +1805,11 @@ public:
/*
@brief
- Get the size of the histogram in bytes built for table_field
- */
- uint get_hist_size()
- {
- return table_field->collected_stats->histogram.get_size();
- }
-
- /*
- @brief
Get the pointer to the histogram built for table_field
*/
- uchar *get_histogram()
+ Histogram_base *get_histogram()
{
- return table_field->collected_stats->histogram.get_values();
+ return table_field->collected_stats->histogram;
}
};
@@ -2125,26 +2209,13 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
ulonglong *idx_avg_frequency= (ulonglong*) alloc_root(&table->mem_root,
sizeof(ulonglong) * key_parts);
- uint hist_size= thd->variables.histogram_size;
- Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type);
- uchar *histogram= NULL;
- if (hist_size > 0)
- {
- if ((histogram= (uchar *) alloc_root(&table->mem_root,
- hist_size * columns)))
- bzero(histogram, hist_size * columns);
-
- }
-
- if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency ||
- (hist_size && !histogram))
+ if (!table_stats || !column_stats || !index_stats || !idx_avg_frequency)
DBUG_RETURN(1);
table->collected_stats= table_stats;
table_stats->column_stats= column_stats;
table_stats->index_stats= index_stats;
table_stats->idx_avg_frequency= idx_avg_frequency;
- table_stats->histograms= histogram;
memset(column_stats, 0, sizeof(Column_statistics) * columns);
@@ -2152,10 +2223,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
{
if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
{
- column_stats->histogram.set_size(hist_size);
- column_stats->histogram.set_type(hist_type);
- column_stats->histogram.set_values(histogram);
- histogram+= hist_size;
+ column_stats->histogram = NULL;
(*field_ptr)->collected_stats= column_stats++;
}
}
@@ -2177,6 +2245,25 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
DBUG_RETURN(0);
}
+/*
+ Free the "local" statistics for table.
+ We only free the statistics that is not on MEM_ROOT and needs to be
+ explicitly freed.
+*/
+void free_statistics_for_table(THD *thd, TABLE *table)
+{
+ for (Field **field_ptr= table->field; *field_ptr; field_ptr++)
+ {
+ // Only delete the histograms that are exclusivly owned by this thread
+ if ((*field_ptr)->collected_stats &&
+ (*field_ptr)->collected_stats->histogram &&
+ (*field_ptr)->collected_stats->histogram->get_owner() == thd)
+ {
+ delete (*field_ptr)->collected_stats->histogram;
+ (*field_ptr)->collected_stats->histogram= NULL;
+ }
+ }
+}
/**
@brief
@@ -2383,7 +2470,8 @@ bool Column_statistics_collected::add()
*/
inline
-void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
+bool Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows,
+ double sample_fraction)
{
double val;
@@ -2401,13 +2489,32 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
}
if (count_distinct)
{
- uint hist_size= count_distinct->get_hist_size();
+ uint hist_size= current_thd->variables.histogram_size;
+ Histogram_type hist_type=
+ (Histogram_type) (current_thd->variables.histogram_type);
+ bool have_histogram= false;
+ if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
+ {
+ have_histogram= true;
+ histogram= create_histogram(mem_root, hist_type, current_thd);
+ histogram->init_for_collection(mem_root, hist_type, hist_size);
+ }
/* Compute cardinality statistics and optionally histogram. */
- if (hist_size == 0)
+ if (!have_histogram)
count_distinct->walk_tree();
else
- count_distinct->walk_tree_with_histogram(rows - nulls);
+ {
+ if (count_distinct->walk_tree_with_histogram(rows - nulls))
+ {
+ delete histogram;
+ histogram= NULL;
+
+ delete count_distinct;
+ count_distinct= NULL;
+ return true; // Error
+ }
+ }
ulonglong distincts= count_distinct->get_count_distinct();
ulonglong distincts_single_occurence=
@@ -2442,15 +2549,14 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
set_not_null(COLUMN_STAT_AVG_FREQUENCY);
}
else
- hist_size= 0;
- histogram.set_size(hist_size);
+ have_histogram= false;
+
set_not_null(COLUMN_STAT_HIST_SIZE);
- if (hist_size && distincts)
+ if (have_histogram && distincts && histogram)
{
set_not_null(COLUMN_STAT_HIST_TYPE);
- histogram.set_values(count_distinct->get_histogram());
set_not_null(COLUMN_STAT_HISTOGRAM);
- }
+ }
delete count_distinct;
count_distinct= NULL;
}
@@ -2459,7 +2565,8 @@ void Column_statistics_collected::finish(ha_rows rows, double sample_fraction)
val= 1.0;
set_avg_frequency(val);
set_not_null(COLUMN_STAT_AVG_FREQUENCY);
- }
+ }
+ return false;
}
@@ -2710,7 +2817,10 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
continue;
bitmap_set_bit(table->write_set, table_field->field_index);
if (!rc)
- table_field->collected_stats->finish(rows, sample_fraction);
+ {
+ rc= table_field->collected_stats->finish(&table->mem_root, rows,
+ sample_fraction);
+ }
else
table_field->collected_stats->cleanup();
}
@@ -2790,7 +2900,7 @@ int update_statistics_for_table(THD *thd, TABLE *table)
start_new_trans new_trans(thd);
- if (open_stat_tables(thd, tables, TRUE))
+ if ((open_stat_tables(thd, tables, TRUE)))
DBUG_RETURN(rc);
save_binlog_format= thd->set_current_stmt_binlog_format_stmt();
@@ -2916,16 +3026,17 @@ int read_statistics_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
/* Read statistics from the statistical table column_stats */
stat_table= stat_tables[COLUMN_STAT].table;
- ulong total_hist_size= 0;
+ bool have_histograms= false;
Column_stat column_stat(stat_table, table);
for (field_ptr= table_share->field; *field_ptr; field_ptr++)
{
table_field= *field_ptr;
column_stat.set_key_fields(table_field);
column_stat.get_stat_values();
- total_hist_size+= table_field->read_stats->histogram.get_size();
+ if (table_field->read_stats->histogram_type_on_disk != INVALID_HISTOGRAM)
+ have_histograms= true;
}
- table_share->stats_cb.total_hist_size= total_hist_size;
+ table_share->stats_cb.have_histograms= have_histograms;
/* Read statistics from the statistical table index_stats */
stat_table= stat_tables[INDEX_STAT].table;
@@ -3021,6 +3132,9 @@ void delete_stat_values_for_table_share(TABLE_SHARE *table_share)
delete column_stats->max_value;
column_stats->max_value= NULL;
}
+
+ delete column_stats->histogram;
+ column_stats->histogram=NULL;
}
}
@@ -3065,28 +3179,28 @@ int read_histograms_for_table(THD *thd, TABLE *table, TABLE_LIST *stat_tables)
if (stats_cb->start_histograms_load())
{
- uchar *histogram= (uchar *) alloc_root(&stats_cb->mem_root,
- stats_cb->total_hist_size);
- if (!histogram)
- {
- stats_cb->abort_histograms_load();
- DBUG_RETURN(1);
- }
- memset(histogram, 0, stats_cb->total_hist_size);
-
Column_stat column_stat(stat_tables[COLUMN_STAT].table, table);
+
+ /*
+ The process of histogram loading makes use of the field it is for. Mark
+ all fields as readable/writable in order to allow that.
+ */
+ MY_BITMAP *old_sets[2];
+ dbug_tmp_use_all_columns(table, old_sets, &table->read_set, &table->write_set);
+
for (Field **field_ptr= table->s->field; *field_ptr; field_ptr++)
{
Field *table_field= *field_ptr;
- if (uint hist_size= table_field->read_stats->histogram.get_size())
+ if (table_field->read_stats->histogram_type_on_disk != INVALID_HISTOGRAM)
{
column_stat.set_key_fields(table_field);
- table_field->read_stats->histogram.set_values(histogram);
- column_stat.get_histogram_value();
- histogram+= hist_size;
+ table_field->read_stats->histogram=
+ column_stat.load_histogram(&stats_cb->mem_root);
}
}
stats_cb->end_histograms_load();
+
+ dbug_tmp_restore_column_maps(&table->read_set, &table->write_set, old_sets);
}
table->histograms_are_read= true;
DBUG_RETURN(0);
@@ -3775,15 +3889,11 @@ double get_column_range_cardinality(Field *field,
if (avg_frequency > 1.0 + 0.000001 &&
col_stats->min_max_values_are_provided())
{
- Histogram *hist= &col_stats->histogram;
- if (hist->is_usable(thd))
+ Histogram_base *hist = col_stats->histogram;
+ if (hist && hist->is_usable(thd))
{
- store_key_image_to_rec(field, (uchar *) min_endp->key,
- field->key_length());
- double pos= field->pos_in_interval(col_stats->min_value,
- col_stats->max_value);
res= col_non_nulls *
- hist->point_selectivity(pos,
+ hist->point_selectivity(field, min_endp,
avg_frequency / col_non_nulls);
}
}
@@ -3798,34 +3908,41 @@ double get_column_range_cardinality(Field *field,
{
if (col_stats->min_max_values_are_provided())
{
- double sel, min_mp_pos, max_mp_pos;
-
- if (min_endp && !(field->null_ptr && min_endp->key[0]))
+ Histogram_base *hist= col_stats->histogram;
+ double avg_frequency= col_stats->get_avg_frequency();
+ double sel;
+ if (hist && hist->is_usable(thd))
{
- store_key_image_to_rec(field, (uchar *) min_endp->key,
- field->key_length());
- min_mp_pos= field->pos_in_interval(col_stats->min_value,
- col_stats->max_value);
+ sel= hist->range_selectivity(field, min_endp, max_endp,
+ avg_frequency / col_non_nulls);
+ res= col_non_nulls * sel;
}
else
- min_mp_pos= 0.0;
- if (max_endp)
{
- store_key_image_to_rec(field, (uchar *) max_endp->key,
- field->key_length());
- max_mp_pos= field->pos_in_interval(col_stats->min_value,
- col_stats->max_value);
- }
- else
- max_mp_pos= 1.0;
+ double min_mp_pos, max_mp_pos;
+ if (min_endp && !(field->null_ptr && min_endp->key[0]))
+ {
+ store_key_image_to_rec(field, (uchar *) min_endp->key,
+ field->key_length());
+ min_mp_pos=
+ field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+ }
+ else
+ min_mp_pos= 0.0;
+ if (max_endp)
+ {
+ store_key_image_to_rec(field, (uchar *) max_endp->key,
+ field->key_length());
+ max_mp_pos=
+ field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+ }
+ else
+ max_mp_pos= 1.0;
- Histogram *hist= &col_stats->histogram;
- if (hist->is_usable(thd))
- sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
- else
- sel= (max_mp_pos - min_mp_pos);
- res= col_non_nulls * sel;
- set_if_bigger(res, col_stats->get_avg_frequency());
+ sel = (max_mp_pos - min_mp_pos);
+ res= col_non_nulls * sel;
+ set_if_bigger(res, avg_frequency);
+ }
}
else
res= col_non_nulls;
@@ -3835,13 +3952,13 @@ double get_column_range_cardinality(Field *field,
return res;
}
-
-
/*
Estimate selectivity of "col=const" using a histogram
- @param pos Position of the "const" between column's min_value and
- max_value. This is a number in [0..1] range.
+ @param field the field to estimate its selectivity.
+
+ @param endpoint The constant
+
@param avg_sel Average selectivity of condition "col=const" in this table.
It is calcuated as (#non_null_values / #distinct_values).
@@ -3870,9 +3987,15 @@ double get_column_range_cardinality(Field *field,
value.
*/
-double Histogram::point_selectivity(double pos, double avg_sel)
+double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
+ double avg_sel)
{
double sel;
+ Column_statistics *col_stats= field->read_stats;
+ store_key_image_to_rec(field, (uchar *) endpoint->key,
+ field->key_length());
+ double pos= field->pos_in_interval(col_stats->min_value,
+ col_stats->max_value);
/* Find the bucket that contains the value 'pos'. */
uint min= find_bucket(pos, TRUE);
uint pos_value= (uint) (pos * prec_factor());
@@ -3906,7 +4029,7 @@ double Histogram::point_selectivity(double pos, double avg_sel)
/*
The value 'pos' fits within one single histogram bucket.
- Histogram buckets have the same numbers of rows, but they cover
+ Histogram_binary buckets have the same numbers of rows, but they cover
different ranges of values.
We assume that values are uniformly distributed across the [0..1] value
@@ -3951,6 +4074,43 @@ double Histogram::point_selectivity(double pos, double avg_sel)
return sel;
}
+
+double Histogram_binary::range_selectivity(Field *field,
+ key_range *min_endp,
+ key_range *max_endp,
+ double avg_sel)
+{
+ double sel, min_mp_pos, max_mp_pos;
+ Column_statistics *col_stats= field->read_stats;
+
+ if (min_endp && !(field->null_ptr && min_endp->key[0]))
+ {
+ store_key_image_to_rec(field, (uchar *) min_endp->key,
+ field->key_length());
+ min_mp_pos=
+ field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+ }
+ else
+ min_mp_pos= 0.0;
+ if (max_endp)
+ {
+ store_key_image_to_rec(field, (uchar *) max_endp->key,
+ field->key_length());
+ max_mp_pos=
+ field->pos_in_interval(col_stats->min_value, col_stats->max_value);
+ }
+ else
+ max_mp_pos= 1.0;
+
+ double bucket_sel= 1.0 / (get_width() + 1);
+ uint min= find_bucket(min_mp_pos, TRUE);
+ uint max= find_bucket(max_mp_pos, FALSE);
+ sel= bucket_sel * (max - min + 1);
+
+ set_if_bigger(sel, avg_sel);
+ return sel;
+}
+
/*
Check whether the table is one of the persistent statistical tables.
*/