diff options
author | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2019-02-15 01:23:00 +0200 |
---|---|---|
committer | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2019-02-19 12:01:21 +0200 |
commit | f0773b7842fcfd2032b630b4cfc7404a29d12a8f (patch) | |
tree | 3b00628835a73575036e3488e2613d39bc8544e0 /mysql-test/main/statistics.test | |
parent | 47f15ea73c49e90b16a4a4adf5414f51bdbf97a4 (diff) | |
download | mariadb-git-f0773b7842fcfd2032b630b4cfc7404a29d12a8f.tar.gz |
Introduce analyze_sample_percentage variable
The variable controls the amount of sampling analyze table performs.
If ANALYZE table with histogram collection is too slow, one can reduce the
time taken by setting analyze_sample_percentage to a lower value of the
total number of rows.
Setting it to 0 will use a formula to compute how many rows to sample:
The number of rows collected is capped to a minimum of 50000 and
increases logarithmically with a coffecient of 4096. The coffecient is
chosen so that we expect an error of less than 3% in our estimations
according to the paper:
"Random Sampling for Histogram Construction: How much is enough?”
– Surajit Chaudhuri, Rajeev Motwani, Vivek Narasayya, ACM SIGMOD, 1998.
The drawback of sampling is that avg_frequency number is computed
imprecisely and will yeild a smaller number than the real one.
Diffstat (limited to 'mysql-test/main/statistics.test')
-rw-r--r-- | mysql-test/main/statistics.test | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/mysql-test/main/statistics.test b/mysql-test/main/statistics.test index 78439e3e68a..84bb7c10059 100644 --- a/mysql-test/main/statistics.test +++ b/mysql-test/main/statistics.test @@ -901,3 +901,91 @@ DROP TABLE t1; --echo # set histogram_size=@save_hist_size, histogram_type=@save_hist_type; + + +--echo # +--echo # Start of 10.4 tests +--echo # + +--echo # +--echo # Test analyze_sample_percentage system variable. +--echo # +set @save_use_stat_tables=@@use_stat_tables; +set @save_analyze_sample_percentage=@@analyze_sample_percentage; + +set session rand_seed1=42; +set session rand_seed2=62; + +set use_stat_tables=PREFERABLY; +set histogram_size=10; + +CREATE TABLE t1 (id int); +INSERT INTO t1 (id) VALUES (1), (1), (1), (1), (1), (1), (1); +INSERT INTO t1 (id) SELECT id FROM t1; +INSERT INTO t1 SELECT id+1 FROM t1; +INSERT INTO t1 SELECT id+2 FROM t1; +INSERT INTO t1 SELECT id+4 FROM t1; +INSERT INTO t1 SELECT id+8 FROM t1; +INSERT INTO t1 SELECT id+16 FROM t1; +INSERT INTO t1 SELECT id+32 FROM t1; +INSERT INTO t1 SELECT id+64 FROM t1; +INSERT INTO t1 SELECT id+128 FROM t1; +INSERT INTO t1 SELECT id+256 FROM t1; +INSERT INTO t1 SELECT id+512 FROM t1; +INSERT INTO t1 SELECT id+1024 FROM t1; +INSERT INTO t1 SELECT id+2048 FROM t1; +INSERT INTO t1 SELECT id+4096 FROM t1; +INSERT INTO t1 SELECT id+9192 FROM t1; + +--echo # +--echo # This query will should show a full table scan analysis. +--echo # +ANALYZE TABLE t1; +select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency, + DECODE_HISTOGRAM(hist_type, histogram) +from mysql.column_stats; + +set analyze_sample_percentage=0.1; + +--echo # +--echo # This query will show an innacurate avg_frequency value. +--echo # +ANALYZE TABLE t1; +select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency, + DECODE_HISTOGRAM(hist_type, histogram) +from mysql.column_stats; + +--echo # +--echo # This query will show a better avg_frequency value. +--echo # +set analyze_sample_percentage=25; +ANALYZE TABLE t1; +select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency, + DECODE_HISTOGRAM(hist_type, histogram) +from mysql.column_stats; + + +set analyze_sample_percentage=0; +--echo # +--echo # Test self adjusting sampling level. +--echo # +ANALYZE TABLE t1; +select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency, + DECODE_HISTOGRAM(hist_type, histogram) +from mysql.column_stats; +-- echo # +-- echo # Test record estimation is working properly. +-- echo # +select count(*) from t1; +explain select * from t1; + +set analyze_sample_percentage=100; +ANALYZE TABLE t1; +select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, avg_frequency, + DECODE_HISTOGRAM(hist_type, histogram) +from mysql.column_stats; +explain select * from t1; + +set use_stat_tables=@save_use_stat_tables; + +drop table t1; |