4 files changed, 151 insertions, 8 deletions
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index b5a48d341ae..9836c832ca7 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -1946,4 +1946,70 @@ set histogram_size=@save_histogram_size;
 set use_stat_tables= @save_use_stat_tables;
 DROP TABLE t1;
 # End of 10.2 tests
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select  99998 from t1;
+insert into t10 select  99999 from t1;
+insert into t10 select 100000 from t1;
+analyze table t10 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	OK
+flush tables;
+set optimizer_trace=1;
+explain select * from t10  where a in (91303);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	NULL	NULL	NULL	NULL	9984	Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) 
+from information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+[
+    [
+        {
+            "column_name": "a",
+            "ranges": 
+            ["91303 <= a <= 91303"],
+            "selectivity_from_histogram": 0.0078125
+        }
+    ]
+]
+drop table t0,t1,t10;
 set @@global.histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test
index 4e4513d09d6..e8f265ca51b 100644
--- a/mysql-test/main/selectivity.test
+++ b/mysql-test/main/selectivity.test
@@ -1329,6 +1329,80 @@ DROP TABLE t1;
 
 --echo # End of 10.2 tests
 
+--echo #
+--echo # MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+--echo #
+create table t0(a int);  # This holds how many rows we hold in a bucket.
+insert into t0 select 1 from seq_1_to_78;
+
+create table t1(a int);  # one-third of a bucket
+insert into t1 select 1 from seq_1_to_26;
+
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+
+
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+
+
+insert into t10 select 34785 from t0, seq_1_to_8;
+
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+
+insert into t10 select 65216 from t0, seq_1_to_52;
+
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+
+insert into t10 select 65219 from t0;
+
+
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+
+insert into t10 select 73915 from t0, seq_1_to_40;
+
+
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+
+insert into t10 select 91302 from t0, seq_1_to_6;
+
+insert into t10 select 91303 from t1; # Only 1/3rd of bucket matches the search tuple
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+
+insert into t10 select 91305 from t0, seq_1_to_8;
+
+insert into t10 select  99998 from t1;
+insert into t10 select  99999 from t1;
+insert into t10 select 100000 from t1;
+
+analyze table t10 persistent for all;
+flush tables;
+set optimizer_trace=1;
+explain select * from t10  where a in (91303);
+--echo # Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) 
+from information_schema.optimizer_trace;
+
+drop table t0,t1,t10;
+
 #
 # Clean up
 #
diff --git a/mysql-test/main/selectivity_no_engine.result b/mysql-test/main/selectivity_no_engine.result
index 3811b12a1be..fe8646ac5bf 100644
--- a/mysql-test/main/selectivity_no_engine.result
+++ b/mysql-test/main/selectivity_no_engine.result
@@ -36,12 +36,12 @@ test.t2	analyze	status	OK
 # The following two must have the same in 'Extra' column:
 explain extended select * from t2 where col1 IN (20, 180);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.08	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (20,180)
 explain extended select * from t2 where col1 IN (180, 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.08	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (180,20)
 drop table t1, t2;
@@ -102,7 +102,7 @@ test.t1	analyze	status	Engine-independent statistics collected
 test.t1	analyze	status	OK
 explain extended select * from t1 where col1 in (1,2,3);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	2.82	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` in (1,2,3)
 # Must not cause fp division by zero, or produce nonsense numbers:
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index be011adb60c..5aca25d7a1b 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3932,7 +3932,8 @@ double Histogram::point_selectivity(double pos, double avg_sel)
     /*
       So:
       - each bucket has the same #rows 
-      - values are unformly distributed across the [min_value,max_value] domain.
+      - We assume that values are unformly distributed across the
+        [min_value,max_value] domain.
 
       If a bucket has value range that's N times bigger then average, than
       each value will have to have N times fewer rows than average.
@@ -3940,11 +3941,13 @@ double Histogram::point_selectivity(double pos, double avg_sel)
     sel= avg_sel * avg_bucket_width / current_bucket_width;
 
     /*
-      (Q: if we just follow this proportion we may end up in a situation
-      where number of different values we expect to find in this bucket
-      exceeds the number of rows that this histogram has in a bucket. Are 
-      we ok with this or we would want to have certain caps?)
+      Note that this adjustment is just a (brave?) heuristic. What we know for
+      certain is that the searched value fits into one histogram bucket. Do not
+      return an estimate larger than that.
     */
+    double bucket_sel= 1.0/(get_width() + 1);
+    if (sel >= bucket_sel)
+      sel= bucket_sel;
   }
   return sel;
 }