summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicențiu Ciorbaru <vicentiu@mariadb.org>2019-02-10 01:43:15 +0200
committerVicențiu Ciorbaru <vicentiu@mariadb.org>2019-02-10 01:43:15 +0200
commit25947c60d54b969359521b9ca55da3054c600745 (patch)
treed28924673e6f4ddab059251395d60b9f189ed7c2
parent30a18eed822c207880a52b31c566525dfef8fb55 (diff)
downloadmariadb-git-bb-10.4-vicentiu-histograms.tar.gz
Default Bernoulli Sampling implementationbb-10.4-vicentiu-histograms10.4-vicentiu-histograms
-rw-r--r--mysql-test/main/stats.test10
-rw-r--r--sql/handler.h44
-rw-r--r--sql/sql_statistics.cc34
-rw-r--r--storage/myisam/ha_myisam.cc2
-rw-r--r--storage/myisam/ha_myisam.h3
-rw-r--r--storage/myisam/mi_range.c9
6 files changed, 73 insertions, 29 deletions
diff --git a/mysql-test/main/stats.test b/mysql-test/main/stats.test
index 0463fb3fe77..f7bfe6cf84c 100644
--- a/mysql-test/main/stats.test
+++ b/mysql-test/main/stats.test
@@ -10,12 +10,16 @@ insert into ten (a) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
create table hundred (a int);
-insert into hundred select t.a from ten t, ten t2, ten t3;
+insert into hundred select t.a from ten t, ten t2, ten t3, ten t4, ten t5, ten t6;
select (count(*)) from hundred;
-create table t2 (a int, b int, PRIMARY KEY (a, b));
+create table t2 (a int, b int, c int, d int, PRIMARY KEY (a, b));
-insert into t2 (a, b) select row_number() over (), row_number() over () * 10 from hundred;
+insert into t2 (a, b, c, d) select row_number() over (), row_number() over () * 10,
+ row_number() over () * 100, row_number() over () * 1000 from hundred;
ANALYZE TABLE t2 persistent for ALL;
+
+select * from mysql.table_stats;
+SELECT * from mysql.column_stats;
diff --git a/sql/handler.h b/sql/handler.h
index 8f9ddc01174..d40e986fd9d 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1906,6 +1906,11 @@ enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0,
HA_STATS_AUTO_RECALC_ON,
HA_STATS_AUTO_RECALC_OFF };
+enum sample_mode {
+ HA_SAMPLE_BERNOULLI= 0,
+ HA_SAMPLE_SYSTEM,
+};
+
/**
A helper struct for schema DDL statements:
CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ]
@@ -2940,9 +2945,11 @@ public:
/** Length of ref (1-8 or the clustered key length) */
uint ref_length;
FT_INFO *ft_handler;
- enum init_stat { NONE=0, INDEX, RND, RANDOM };
+ enum init_stat { NONE=0, INDEX, RND, SAMPLE };
init_stat inited, pre_inited;
+ double sample_fraction= 0;
+ enum sample_mode sample_mode;
const COND *pushed_cond;
/**
next_insert_id is the next value which should be inserted into the
@@ -3105,21 +3112,25 @@ public:
virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key)
{ return 0; }
- virtual int ha_random_sample_init(THD *thd, ha_rows estimate_rows_read)
+ int ha_random_sample_init(THD *thd, enum sample_mode mode, double fraction)
__attribute__((warn_unused_result))
{
DBUG_ENTER("ha_random_sample_init");
- inited= RANDOM;
- DBUG_RETURN(random_sample_init(thd, estimate_rows_read));
+ DBUG_ASSERT(inited==NONE);
+ int result;
+ sample_mode= mode;
+ sample_fraction= fraction;
+ inited= (result= random_sample_init(mode, fraction)) ? NONE : SAMPLE;
+ DBUG_RETURN(result);
}
- virtual int ha_random_sample(uchar *buf)
+ int ha_random_sample(uchar *buf)
__attribute__((warn_unused_result))
{
DBUG_ENTER("ha_random_sample");
- DBUG_ASSERT(inited == RANDOM);
+ DBUG_ASSERT(inited == SAMPLE);
DBUG_RETURN(random_sample(buf));
}
- virtual int ha_random_sample_end() __attribute__((warn_unused_result))
+ int ha_random_sample_end()
{
DBUG_ENTER("ha_random_sample_end");
inited= NONE;
@@ -4439,12 +4450,25 @@ private:
/* Note: ha_index_read_idx_map() may bypass index_init() */
virtual int index_init(uint idx, bool sorted) { return 0; }
virtual int index_end() { return 0; }
- virtual int random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) { return 0; } ;
+ virtual int random_sample_init(enum sample_mode mode, double fraction)
+ {
+ return rnd_init(TRUE);
+ }
virtual int random_sample(uchar *buf)
{
- return HA_ERR_WRONG_COMMAND;
+ int rc;
+ THD *thd= ha_thd();
+ do
+ {
+ rc= rnd_next(buf);
+
+ if (rc == HA_ERR_RECORD_DELETED)
+ continue;
+
+ } while (rc == HA_ERR_RECORD_DELETED || thd_rnd(thd) > sample_fraction);
+ return rc;
}
- virtual int random_sample_end() { return 0; };
+ virtual int random_sample_end() { return rnd_end(); }
/**
rnd_init() can be called two times without rnd_end() in between
(it only makes sense if scan=1).
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index daebc5d0b38..b8b99015745 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -2727,12 +2727,15 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
Field *table_field;
ha_rows rows= 0;
handler *file=table->file;
+ double sample_fraction;
DBUG_ENTER("collect_statistics_for_table");
table->collected_stats->cardinality_is_null= TRUE;
table->collected_stats->cardinality= 0;
+ table->file->info(HA_STATUS_VARIABLE);
+
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
table_field= *field_ptr;
@@ -2743,19 +2746,27 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
restore_record(table, s->default_values);
- rc= file->ha_random_sample_init(thd, 100);
- rc= file->ha_random_sample(table->record[0]);
- table_field->collected_stats->add(0);
- rc= file->ha_random_sample_end();
+ if (file->records() < 30000)
+ {
+ sample_fraction= 1;
+ }
+ else
+ {
+ sample_fraction= std::fmin(
+ (30000 + 4096 * log(200 * file->records())) /
+ (file->records() + 1), 1);
+ }
+
+
+ /* Fetch samples from the table to collect statistics on table's columns */
- /* Perform a full table scan to collect statistics on 'table's columns */
- /*
- if (!(rc= file->ha_rnd_init(TRUE)))
- {
+ if (!(rc= file->ha_random_sample_init(thd, HA_SAMPLE_BERNOULLI,
+ sample_fraction)))
+ {
DEBUG_SYNC(table->in_use, "statistics_collection_start");
- while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE)
+ while ((rc= file->ha_random_sample(table->record[0])) != HA_ERR_END_OF_FILE)
{
if (thd->killed)
break;
@@ -2775,10 +2786,9 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
break;
rows++;
}
- file->ha_rnd_end();
+ file->ha_random_sample_end();
}
rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1;
- */
/*
Calculate values for all statistical characteristics on columns and
and for each field f of 'table' save them in the write_stat structure
@@ -2787,7 +2797,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
if (!rc)
{
table->collected_stats->cardinality_is_null= FALSE;
- table->collected_stats->cardinality= rows;
+ table->collected_stats->cardinality= rows / sample_fraction;
}
bitmap_clear_all(table->write_set);
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index 45abd2ff404..d032a267550 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -2723,6 +2723,7 @@ my_bool ha_myisam::register_query_cache_table(THD *thd, const char *table_name,
DBUG_RETURN(TRUE);
}
+/*
int ha_myisam::random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read)
{
DBUG_ENTER("ha_myisam::random_sample_init");
@@ -2740,4 +2741,5 @@ int ha_myisam::random_sample_end()
DBUG_ENTER("ha_myisam::random_sample_end");
DBUG_RETURN(mi_random_sample_end(file));
}
+*/
#endif
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index 08927435c14..804963f5efc 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -97,9 +97,6 @@ class ha_myisam: public handler
table->record[0]);
}
int ft_read(uchar *buf);
- int random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) override;
- int random_sample(uchar *buf) override;
- int random_sample_end() override;
int rnd_init(bool scan);
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c
index d33f3614507..47d320c944e 100644
--- a/storage/myisam/mi_range.c
+++ b/storage/myisam/mi_range.c
@@ -288,12 +288,18 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
static int _mi_read_sample_static_record(MI_INFO *info, uchar *buf)
{
+ my_off_t record_offset;
DBUG_ENTER("_mi_read_sample_static_record");
DBUG_ASSERT(info->s->read_rnd == _mi_read_rnd_static_record);
if (fast_mi_readinfo(info))
DBUG_RETURN(-1);
+ record_offset= info->s->pack.header_length +
+ ((ha_rows) (my_rnd_ssl(&info->sampling_state.rand) *
+ info->state->records)) * info->s->base.reclength;
+
+ info->s->read_rnd(info, buf, record_offset, 1);
fast_mi_writeinfo(info);
DBUG_RETURN(0);
@@ -314,11 +320,12 @@ static int _mi_read_sample_bernoulli(MI_INFO *info, uchar *buf)
select_probability= (double) info->sampling_state.estimate_rows_read /
info->state->records;
+
do
{
if ((res= mi_scan(info, buf)))
break;
- } while (my_rnd_ssl(&info->sampling_state.rand) < select_probability);
+ } while (my_rnd_ssl(&info->sampling_state.rand) > select_probability);
fast_mi_writeinfo(info);
DBUG_RETURN(res);