summaryrefslogtreecommitdiff
path: root/sql/opt_histogram_json.cc
diff options
context:
space:
mode:
authorSergei Petrunia <sergey@mariadb.com>2022-01-08 22:36:12 +0300
committerSergei Petrunia <psergey@askmonty.org>2022-01-19 18:10:12 +0300
commit531dd708eff5821da5561daeea2f23b83c6b53ec (patch)
tree650fd49465587dc3cbe117382343b8190e8a4715 /sql/opt_histogram_json.cc
parent67d4d0426fb15c08cbc260b7b5c93a9c71ddfe16 (diff)
downloadmariadb-git-531dd708eff5821da5561daeea2f23b83c6b53ec.tar.gz
MDEV-27229: Estimation for filtered rows less precise ... #5
Fix special handling for values that are right next to buckets with ndv=1.
Diffstat (limited to 'sql/opt_histogram_json.cc')
-rw-r--r--sql/opt_histogram_json.cc139
1 files changed, 88 insertions, 51 deletions
diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc
index 2ee6cd73dbe..7c037183f41 100644
--- a/sql/opt_histogram_json.cc
+++ b/sql/opt_histogram_json.cc
@@ -910,12 +910,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
// If the value is outside of the histogram's range, this will "clip" it to
// first or last bucket.
- bool equal;
- int idx= find_bucket(field, key, &equal);
+ int endp_cmp;
+ int idx= find_bucket(field, key, &endp_cmp);
double sel;
- if (buckets[idx].ndv == 1 && !equal)
+ if (buckets[idx].ndv == 1 && (endp_cmp!=0))
{
/*
The bucket has a single value and it doesn't match! Return a very
@@ -979,22 +979,27 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
// Find the leftmost bucket that contains the lookup value.
// (If the lookup value is to the left of all buckets, find bucket #0)
- bool equal;
- int idx= find_bucket(field, min_key, &equal);
- if (equal && exclusive_endp && buckets[idx].ndv==1 &&
- idx < (int)buckets.size()-1)
+ int endp_cmp;
+ int idx= find_bucket(field, min_key, &endp_cmp);
+
+ double sel;
+ // Special handling for buckets with ndv=1:
+ if (buckets[idx].ndv == 1)
{
- /*
- The range is "col > $CONST" and we've found a bucket that contains
- only the value $CONST. Move to the next bucket.
- */
- idx++;
+ if (endp_cmp < 0)
+ sel= 0.0;
+ else if (endp_cmp > 0)
+ sel= 1.0;
+ else // endp_cmp == 0.0
+ sel= (exclusive_endp)? 1.0 : 0.0;
+ }
+ else
+ {
+ sel= position_in_interval(field, min_key, min_key_len,
+ buckets[idx].start_value,
+ get_end_value(idx));
}
double left_fract= get_left_fract(idx);
- double sel= position_in_interval(field, min_key, min_key_len,
- buckets[idx].start_value,
- get_end_value(idx));
-
min= left_fract + sel * (buckets[idx].cum_fract - left_fract);
}
else
@@ -1012,28 +1017,35 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
max_key++;
max_key_len--;
}
- bool equal;
- int idx= find_bucket(field, max_key, &equal);
+ int endp_cmp;
+ int idx= find_bucket(field, max_key, &endp_cmp);
- if (equal && !inclusive_endp && idx > 0)
+ if ((endp_cmp == 0) && !inclusive_endp)
{
/*
The range is "col < $CONST" and we've found a bucket starting with
- $CONST. Move to the previous bucket.
+ $CONST.
*/
- idx--;
- equal= false;
+ if (idx > 0)
+ {
+ // Move to the previous bucket
+ endp_cmp= 1;
+ idx--;
+ }
+ else
+ endp_cmp= -1;
}
- double left_fract= get_left_fract(idx);
-
double sel;
- /* Special handling for singleton buckets */
- if (buckets[idx].ndv == 1 && equal)
+
+ // Special handling for buckets with ndv=1:
+ if (buckets[idx].ndv == 1)
{
- if (inclusive_endp)
- sel= 1.0;
- else
+ if (endp_cmp < 0)
sel= 0.0;
+ else if (endp_cmp > 0)
+ sel= 1.0;
+ else // endp_cmp == 0.0
+ sel= inclusive_endp? 1.0 : 0.0;
}
else
{
@@ -1041,13 +1053,13 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
buckets[idx].start_value,
get_end_value(idx));
}
+ double left_fract= get_left_fract(idx);
max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
}
else
max= 1.0;
- double sel = max - min;
- return sel;
+ return max - min;
}
@@ -1057,25 +1069,37 @@ void Histogram_json_hb::serialize(Field *field)
}
+static int SGN(int x)
+{
+ if (!x)
+ return 0;
+ return (x < 0)? -1 : 1;
+}
+
+
/*
@brief
Find the leftmost histogram bucket such that "lookup_val >= start_value".
@param field Field object (used to do value comparisons)
@param lookup_val The lookup value in KeyTupleFormat.
- @param equal OUT TRUE<=> the found bucket has left_bound=lookup_val
-
+ @param cmp OUT How the lookup_val compares to found_bucket.left_bound:
+ 0 - lookup_val == bucket.left_bound
+ >0 - lookup_val > bucket.left_bound (the most typical)
+ <0 - lookup_val < bucket.left_bound. This can only happen
+ for the first bucket, for all other buckets we would just
+ pick the previous bucket and have cmp>=0.
@return
The bucket index
*/
int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
- bool *equal)
+ int *cmp)
{
int res;
int low= 0;
int high= (int)buckets.size() - 1;
- *equal= false;
+ *cmp= 1; // By default, (bucket[retval].start_value < *lookup_val)
while (low + 1 < high)
{
@@ -1083,7 +1107,7 @@ int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
if (!res)
{
- *equal= true;
+ *cmp= res;
low= middle;
goto end;
}
@@ -1104,31 +1128,44 @@ int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
*/
if (low == 0)
{
- res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
- if (!res)
- *equal= true;
- else if (res < 0) // buckets[0] < lookup_val
+ res= field->key_cmp(lookup_val, (uchar*)buckets[0].start_value.data());
+ if (res <= 0)
+ *cmp= res;
+ else // res>0, lookup_val > buckets[0].start_value
{
- res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
- if (!res)
- *equal= true;
- if (res <= 0) // buckets[high] <= lookup_val
+ res= field->key_cmp(lookup_val, (uchar*)buckets[high].start_value.data());
+ if (res >= 0) // lookup_val >= buckets[high].start_value
+ {
+ // Move to that bucket
low= high;
+ *cmp= res;
+ }
+ else
+ *cmp= 1;
}
}
else if (high == (int)buckets.size() - 1)
{
- res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
- if (!res)
- *equal= true;
- if (res <= 0)
+ res= field->key_cmp(lookup_val, (uchar*)buckets[high].start_value.data());
+ if (res >= 0)
+ {
+ // Ok the value is in the last bucket.
+ *cmp= res;
low= high;
+ }
+ else
+ {
+ // The value is in the 'low' bucket.
+ res= field->key_cmp(lookup_val, (uchar*)buckets[low].start_value.data());
+ *cmp= res;
+ }
}
end:
- // Verification: *equal==TRUE <=> lookup value is equal to the found bucket.
- DBUG_ASSERT(*equal == !(field->key_cmp((uchar*)buckets[low].start_value.data(),
- lookup_val)));
+ // Verification: *cmp has correct value
+ DBUG_ASSERT(SGN(*cmp) ==
+ SGN(field->key_cmp(lookup_val,
+ (uchar*)buckets[low].start_value.data())));
// buckets[low] <= lookup_val, with one exception of the first bucket.
DBUG_ASSERT(low == 0 ||
field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0);