summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <psergey@askmonty.org>2021-11-26 20:03:08 +0300
committerSergei Petrunia <psergey@askmonty.org>2021-11-26 20:03:08 +0300
commitbfb9f775525c05fa1b160ed1e63c51ee53eb052c (patch)
tree1efb9c59be966db121445353fd13892d357633ba
parent71ae8a809499802e1187c07aba4e07fbf8be1b3b (diff)
downloadmariadb-git-preview-10.7-MDEV-26519-json-histograms.tar.gz
MDEV-26886: Estimation for filtered rows less precise with JSON histogrampreview-10.7-MDEV-26519-json-histograms
- Make Histogram_json_hb::range_selectivity handle singleton buckets specially when computing selectivity of the max. endpoint bound. (for min. endpoint, we already do that). - Also, fixed comments for Histogram_json_hb::find_bucket
-rw-r--r--mysql-test/main/statistics_json.result25
-rw-r--r--mysql-test/main/statistics_json.test14
-rw-r--r--sql/opt_histogram_json.cc49
-rw-r--r--sql/opt_histogram_json.h2
4 files changed, 67 insertions, 23 deletions
diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result
index 99373c93166..537233ef2c2 100644
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@@ -7658,3 +7658,28 @@ test.t1 analyze status OK
ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk;
INSERT INTO t1 (f) VALUES ('bar');
DROP TABLE t1;
+#
+# MDEV-26886: Estimation for filtered rows less precise with JSON histogram
+#
+create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100;
+select count(*) from t1 where a <= 0;
+count(*)
+33
+set histogram_type = JSON_HB, histogram_size=default;
+analyze table t1 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+analyze select * from t1 where a <= 0;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
+analyze select * from t1 where a < 0;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where
+analyze select * from t1 where a > 0;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
+analyze select * from t1 where a >= 0;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 100.00 100.00 Using where
+drop table t1;
diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test
index c0c88895f05..23e5e7395a7 100644
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@@ -340,3 +340,17 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL;
ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk;
INSERT INTO t1 (f) VALUES ('bar');
DROP TABLE t1;
+
+--echo #
+--echo # MDEV-26886: Estimation for filtered rows less precise with JSON histogram
+--echo #
+create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100;
+select count(*) from t1 where a <= 0;
+
+set histogram_type = JSON_HB, histogram_size=default;
+analyze table t1 persistent for all;
+analyze select * from t1 where a <= 0;
+analyze select * from t1 where a < 0;
+analyze select * from t1 where a > 0;
+analyze select * from t1 where a >= 0;
+drop table t1;
diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc
index 44b058761a4..e1a4efc6b28 100644
--- a/sql/opt_histogram_json.cc
+++ b/sql/opt_histogram_json.cc
@@ -743,9 +743,22 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
idx--;
}
double left_fract= get_left_fract(idx);
- double sel= position_in_interval(field, max_key, max_key_len,
- buckets[idx].start_value,
- get_end_value(idx));
+
+ double sel;
+ /* Special handling for singleton buckets */
+ if (buckets[idx].ndv == 1 && equal)
+ {
+ if (inclusive_endp)
+ sel= 1.0;
+ else
+ sel= 0.0;
+ }
+ else
+ {
+ sel= position_in_interval(field, max_key, max_key_len,
+ buckets[idx].start_value,
+ get_end_value(idx));
+ }
max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
}
else
@@ -763,26 +776,18 @@ void Histogram_json_hb::serialize(Field *field)
/*
- Find the rightmost histogram bucket such that "lookup_val $GT start_value".
-
- $GT is either '>' or '>=' depending on equal_is_less parameter.
-
- @param equal_is_less Controls what to do if a histogram bound is equal to the
- lookup_val.
-
- @detail
- Possible cases:
- 1. The regular case: the value falls into some bucket.
+ @brief
+ Find the leftmost histogram bucket such that "lookup_val >= start_value".
- 2. The value is less than the minimum of the first bucket
- 3. The value is greater than the maximum of the last bucket
- In these cases we "clip" to the first/last bucket.
+ @param field Field object (used to do value comparisons)
+ @param lookup_val The lookup value in KeyTupleFormat.
+ @param equal OUT TRUE<=> the found bucket has left_bound=lookup_val
- 4. The value hits the bucket boundary. Then, we need to know whether the
- point of interest is to the left the constant, or to the right of it.
+ @return
+ The bucket index
*/
-int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
+int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
bool *equal)
{
int res;
@@ -806,10 +811,10 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
}
/*
- If low and high were assigned a value in the above loop, then they are not
- equal to the lookup value:
+ If low and high were assigned a value in the above loop and we got here,
+ then they are not equal to the lookup value:
- bucket[low] < lookup_val < bucket[high]
+ bucket[low].start_value < lookup_val < bucket[high].start_value
But there are two special cases: low=0 and high=last_bucket. Handle them
below.
diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h
index 6771942e94e..a5499917971 100644
--- a/sql/opt_histogram_json.h
+++ b/sql/opt_histogram_json.h
@@ -124,6 +124,6 @@ public:
private:
double get_left_fract(int idx);
std::string& get_end_value(int idx);
- int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
+ int find_bucket(const Field *field, const uchar *lookup_val, bool *equal);
};