diff options
author | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2019-02-10 01:43:15 +0200 |
---|---|---|
committer | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2019-02-10 01:43:15 +0200 |
commit | 25947c60d54b969359521b9ca55da3054c600745 (patch) | |
tree | d28924673e6f4ddab059251395d60b9f189ed7c2 | |
parent | 30a18eed822c207880a52b31c566525dfef8fb55 (diff) | |
download | mariadb-git-bb-10.4-vicentiu-histograms.tar.gz |
Default Bernoulli Sampling implementationbb-10.4-vicentiu-histograms10.4-vicentiu-histograms
-rw-r--r-- | mysql-test/main/stats.test | 10 | ||||
-rw-r--r-- | sql/handler.h | 44 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 34 | ||||
-rw-r--r-- | storage/myisam/ha_myisam.cc | 2 | ||||
-rw-r--r-- | storage/myisam/ha_myisam.h | 3 | ||||
-rw-r--r-- | storage/myisam/mi_range.c | 9 |
6 files changed, 73 insertions, 29 deletions
diff --git a/mysql-test/main/stats.test b/mysql-test/main/stats.test index 0463fb3fe77..f7bfe6cf84c 100644 --- a/mysql-test/main/stats.test +++ b/mysql-test/main/stats.test @@ -10,12 +10,16 @@ insert into ten (a) values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); create table hundred (a int); -insert into hundred select t.a from ten t, ten t2, ten t3; +insert into hundred select t.a from ten t, ten t2, ten t3, ten t4, ten t5, ten t6; select (count(*)) from hundred; -create table t2 (a int, b int, PRIMARY KEY (a, b)); +create table t2 (a int, b int, c int, d int, PRIMARY KEY (a, b)); -insert into t2 (a, b) select row_number() over (), row_number() over () * 10 from hundred; +insert into t2 (a, b, c, d) select row_number() over (), row_number() over () * 10, + row_number() over () * 100, row_number() over () * 1000 from hundred; ANALYZE TABLE t2 persistent for ALL; + +select * from mysql.table_stats; +SELECT * from mysql.column_stats; diff --git a/sql/handler.h b/sql/handler.h index 8f9ddc01174..d40e986fd9d 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1906,6 +1906,11 @@ enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0, HA_STATS_AUTO_RECALC_ON, HA_STATS_AUTO_RECALC_OFF }; +enum sample_mode { + HA_SAMPLE_BERNOULLI= 0, + HA_SAMPLE_SYSTEM, +}; + /** A helper struct for schema DDL statements: CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ] @@ -2940,9 +2945,11 @@ public: /** Length of ref (1-8 or the clustered key length) */ uint ref_length; FT_INFO *ft_handler; - enum init_stat { NONE=0, INDEX, RND, RANDOM }; + enum init_stat { NONE=0, INDEX, RND, SAMPLE }; init_stat inited, pre_inited; + double sample_fraction= 0; + enum sample_mode sample_mode; const COND *pushed_cond; /** next_insert_id is the next value which should be inserted into the @@ -3105,21 +3112,25 @@ public: virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key) { return 0; } - virtual int ha_random_sample_init(THD *thd, ha_rows estimate_rows_read) + int ha_random_sample_init(THD *thd, enum sample_mode mode, double fraction) __attribute__((warn_unused_result)) { DBUG_ENTER("ha_random_sample_init"); - inited= RANDOM; - DBUG_RETURN(random_sample_init(thd, estimate_rows_read)); + DBUG_ASSERT(inited==NONE); + int result; + sample_mode= mode; + sample_fraction= fraction; + inited= (result= random_sample_init(mode, fraction)) ? NONE : SAMPLE; + DBUG_RETURN(result); } - virtual int ha_random_sample(uchar *buf) + int ha_random_sample(uchar *buf) __attribute__((warn_unused_result)) { DBUG_ENTER("ha_random_sample"); - DBUG_ASSERT(inited == RANDOM); + DBUG_ASSERT(inited == SAMPLE); DBUG_RETURN(random_sample(buf)); } - virtual int ha_random_sample_end() __attribute__((warn_unused_result)) + int ha_random_sample_end() { DBUG_ENTER("ha_random_sample_end"); inited= NONE; @@ -4439,12 +4450,25 @@ private: /* Note: ha_index_read_idx_map() may bypass index_init() */ virtual int index_init(uint idx, bool sorted) { return 0; } virtual int index_end() { return 0; } - virtual int random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) { return 0; } ; + virtual int random_sample_init(enum sample_mode mode, double fraction) + { + return rnd_init(TRUE); + } virtual int random_sample(uchar *buf) { - return HA_ERR_WRONG_COMMAND; + int rc; + THD *thd= ha_thd(); + do + { + rc= rnd_next(buf); + + if (rc == HA_ERR_RECORD_DELETED) + continue; + + } while (rc == HA_ERR_RECORD_DELETED || thd_rnd(thd) > sample_fraction); + return rc; } - virtual int random_sample_end() { return 0; }; + virtual int random_sample_end() { return rnd_end(); } /** rnd_init() can be called two times without rnd_end() in between (it only makes sense if scan=1). diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index daebc5d0b38..b8b99015745 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -2727,12 +2727,15 @@ int collect_statistics_for_table(THD *thd, TABLE *table) Field *table_field; ha_rows rows= 0; handler *file=table->file; + double sample_fraction; DBUG_ENTER("collect_statistics_for_table"); table->collected_stats->cardinality_is_null= TRUE; table->collected_stats->cardinality= 0; + table->file->info(HA_STATUS_VARIABLE); + for (field_ptr= table->field; *field_ptr; field_ptr++) { table_field= *field_ptr; @@ -2743,19 +2746,27 @@ int collect_statistics_for_table(THD *thd, TABLE *table) restore_record(table, s->default_values); - rc= file->ha_random_sample_init(thd, 100); - rc= file->ha_random_sample(table->record[0]); - table_field->collected_stats->add(0); - rc= file->ha_random_sample_end(); + if (file->records() < 30000) + { + sample_fraction= 1; + } + else + { + sample_fraction= std::fmin( + (30000 + 4096 * log(200 * file->records())) / + (file->records() + 1), 1); + } + + + /* Fetch samples from the table to collect statistics on table's columns */ - /* Perform a full table scan to collect statistics on 'table's columns */ - /* - if (!(rc= file->ha_rnd_init(TRUE))) - { + if (!(rc= file->ha_random_sample_init(thd, HA_SAMPLE_BERNOULLI, + sample_fraction))) + { DEBUG_SYNC(table->in_use, "statistics_collection_start"); - while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE) + while ((rc= file->ha_random_sample(table->record[0])) != HA_ERR_END_OF_FILE) { if (thd->killed) break; @@ -2775,10 +2786,9 @@ int collect_statistics_for_table(THD *thd, TABLE *table) break; rows++; } - file->ha_rnd_end(); + file->ha_random_sample_end(); } rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1; - */ /* Calculate values for all statistical characteristics on columns and and for each field f of 'table' save them in the write_stat structure @@ -2787,7 +2797,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table) if (!rc) { table->collected_stats->cardinality_is_null= FALSE; - table->collected_stats->cardinality= rows; + table->collected_stats->cardinality= rows / sample_fraction; } bitmap_clear_all(table->write_set); diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 45abd2ff404..d032a267550 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -2723,6 +2723,7 @@ my_bool ha_myisam::register_query_cache_table(THD *thd, const char *table_name, DBUG_RETURN(TRUE); } +/* int ha_myisam::random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) { DBUG_ENTER("ha_myisam::random_sample_init"); @@ -2740,4 +2741,5 @@ int ha_myisam::random_sample_end() DBUG_ENTER("ha_myisam::random_sample_end"); DBUG_RETURN(mi_random_sample_end(file)); } +*/ #endif diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 08927435c14..804963f5efc 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -97,9 +97,6 @@ class ha_myisam: public handler table->record[0]); } int ft_read(uchar *buf); - int random_sample_init(MYSQL_THD thd, ha_rows estimate_rows_read) override; - int random_sample(uchar *buf) override; - int random_sample_end() override; int rnd_init(bool scan); int rnd_next(uchar *buf); int rnd_pos(uchar * buf, uchar *pos); diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c index d33f3614507..47d320c944e 100644 --- a/storage/myisam/mi_range.c +++ b/storage/myisam/mi_range.c @@ -288,12 +288,18 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, static int _mi_read_sample_static_record(MI_INFO *info, uchar *buf) { + my_off_t record_offset; DBUG_ENTER("_mi_read_sample_static_record"); DBUG_ASSERT(info->s->read_rnd == _mi_read_rnd_static_record); if (fast_mi_readinfo(info)) DBUG_RETURN(-1); + record_offset= info->s->pack.header_length + + ((ha_rows) (my_rnd_ssl(&info->sampling_state.rand) * + info->state->records)) * info->s->base.reclength; + + info->s->read_rnd(info, buf, record_offset, 1); fast_mi_writeinfo(info); DBUG_RETURN(0); @@ -314,11 +320,12 @@ static int _mi_read_sample_bernoulli(MI_INFO *info, uchar *buf) select_probability= (double) info->sampling_state.estimate_rows_read / info->state->records; + do { if ((res= mi_scan(info, buf))) break; - } while (my_rnd_ssl(&info->sampling_state.rand) < select_probability); + } while (my_rnd_ssl(&info->sampling_state.rand) > select_probability); fast_mi_writeinfo(info); DBUG_RETURN(res); |