summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2021-10-22 19:43:19 +0300
committerSergei Petrunia <psergey@askmonty.org>2022-01-19 18:10:11 +0300
commit05877df472fa95cab763dfb1299e81681a12da73 (patch)
tree00753389e2ddeb3931cb3fdb605b2c92064373b1
parentf3f78bed8530e1e858d5ed87054f2ac672760824 (diff)
downloadmariadb-git-05877df472fa95cab763dfb1299e81681a12da73.tar.gz
MDEV-26849: JSON Histograms: point selectivity estimates are off
.. for non-existent values. Handle this special case.
-rw-r--r--mysql-test/main/statistics.result1
-rw-r--r--mysql-test/main/statistics.test2
-rw-r--r--mysql-test/main/statistics_json.result102
-rw-r--r--mysql-test/main/statistics_json.test22
-rw-r--r--sql/opt_histogram_json.cc10
-rw-r--r--sql/opt_histogram_json.h3
-rw-r--r--sql/sql_statistics.cc6
-rw-r--r--sql/sql_statistics.h6
8 files changed, 142 insertions, 10 deletions
diff --git a/mysql-test/main/statistics.result b/mysql-test/main/statistics.result
index 7556900f4a2..3299c8904a9 100644
--- a/mysql-test/main/statistics.result
+++ b/mysql-test/main/statistics.result
@@ -1885,6 +1885,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 0.15705,0.15711,0.21463,0.15705,0.15711,0.15
explain select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
+drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/statistics.test b/mysql-test/main/statistics.test
index cbb923bee66..e39a95300af 100644
--- a/mysql-test/main/statistics.test
+++ b/mysql-test/main/statistics.test
@@ -1019,7 +1019,7 @@ select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, a
from mysql.column_stats;
explain select * from t1;
-
+drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result
index 3cd9bfb181f..1e7e0d88e72 100644
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@@ -4148,6 +4148,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 {
explain select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
+drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
@@ -7530,3 +7531,104 @@ select c from t1 where c > '1';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where
drop table t1;
+#
+# MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
+#
+#
+create table t0(a int);
+insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
+select a, count(*) from t1 group by a order by a;
+a count(*)
+0 100
+100 100
+200 100
+300 100
+400 100
+500 100
+600 100
+700 100
+800 100
+900 100
+set histogram_type=json_hb, histogram_size=default;
+analyze table t1 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats where table_name='t1';
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 0 900 0.0000 4.0000 100.0000 10 JSON_HB {
+ "histogram_hb_v2": [
+ {
+ "start": "0",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "100",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "200",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "300",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "400",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "500",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "600",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "700",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "800",
+ "size": 0.1,
+ "ndv": 1
+ },
+ {
+ "start": "900",
+ "end": "900",
+ "size": 0.1,
+ "ndv": 1
+ }
+ ]
+}
+analyze select * from t1 where a=0;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
+analyze select * from t1 where a=50;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
+analyze select * from t1 where a=70;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
+analyze select * from t1 where a=100;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
+analyze select * from t1 where a=150;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
+analyze select * from t1 where a=200;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
+drop table t0,t1;
diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test
index 352150e8497..e968c910f4b 100644
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@@ -295,3 +295,25 @@ select c from t1 where c > '1';
drop table t1;
+--echo #
+--echo # MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
+--echo #
+--echo #
+
+create table t0(a int);
+insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
+select a, count(*) from t1 group by a order by a;
+set histogram_type=json_hb, histogram_size=default;
+analyze table t1 persistent for all;
+select * from mysql.column_stats where table_name='t1';
+analyze select * from t1 where a=0;
+analyze select * from t1 where a=50;
+analyze select * from t1 where a=70;
+analyze select * from t1 where a=100;
+analyze select * from t1 where a=150;
+analyze select * from t1 where a=200;
+
+drop table t0,t1;
+
diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc
index 1793aa7df0d..89af9fe3c3f 100644
--- a/sql/opt_histogram_json.cc
+++ b/sql/opt_histogram_json.cc
@@ -616,7 +616,7 @@ double position_in_interval(Field *field, const uchar *key, uint key_len,
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
- double avg_sel)
+ double avg_sel, double total_rows)
{
const uchar *key = endpoint->key;
if (field->real_maybe_null())
@@ -631,9 +631,11 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
if (buckets[idx].ndv == 1 && !equal)
{
- // The bucket has a single value and it doesn't match! Use the global
- // average.
- sel= avg_sel;
+ /*
+ The bucket has a single value and it doesn't match! Return a very
+ small value.
+ */
+ sel= 1.0 / total_rows;
}
else
{
diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h
index df7547b31ee..6771942e94e 100644
--- a/sql/opt_histogram_json.h
+++ b/sql/opt_histogram_json.h
@@ -109,7 +109,8 @@ public:
ulonglong size) override;
double point_selectivity(Field *field, key_range *endpoint,
- double avg_selection) override;
+ double avg_selection,
+ double total_rows) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index c62230a9af1..c87330316f4 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3882,7 +3882,8 @@ double get_column_range_cardinality(Field *field,
{
res= col_non_nulls *
hist->point_selectivity(field, min_endp,
- avg_frequency / col_non_nulls);
+ avg_frequency / col_non_nulls,
+ tab_records);
}
}
else if (avg_frequency == 0.0)
@@ -3973,7 +3974,8 @@ double get_column_range_cardinality(Field *field,
*/
double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
- double avg_sel)
+ double avg_sel,
+ double total_records)
{
double sel;
Column_statistics *col_stats= field->read_stats;
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 20a9bb9dd83..c5b1aa54399 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -186,7 +186,8 @@ public:
virtual double point_selectivity(Field *field, key_range *endpoint,
- double avg_selection)=0;
+ double avg_selectivity,
+ double total_rows)=0;
virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0;
@@ -355,7 +356,8 @@ public:
Estimate selectivity of "col=const" using a histogram
*/
double point_selectivity(Field *field, key_range *endpoint,
- double avg_sel) override;
+ double avg_sel,
+ double total_rows) override;
};