summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
diff options
context:
space:
mode:
authorAlya Berciu <alya.berciu@mongodb.com>2022-12-16 10:06:28 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-12-16 11:00:05 +0000
commit49a208dc8950dfcc85ee65e85fb725be80dd4aba (patch)
tree79be88911e68025ea906d74a4e7afb271f8445f0 /src/mongo/db/query/ce/histogram_predicate_estimation.cpp
parent46079d2869363a651457013e18e8e10f44502ed9 (diff)
downloadmongo-49a208dc8950dfcc85ee65e85fb725be80dd4aba.tar.gz
SERVER-70936 Implement CE for booleans
Diffstat (limited to 'src/mongo/db/query/ce/histogram_predicate_estimation.cpp')
-rw-r--r--src/mongo/db/query/ce/histogram_predicate_estimation.cpp49
1 files changed, 34 insertions, 15 deletions
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
index fca3db899f7..22452726877 100644
--- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
@@ -252,25 +252,44 @@ EstimationResult estimate(const ScalarHistogram& h,
/**
* Returns how many values of the given type are known by the array histogram.
*/
-double getTypeCard(const ArrayHistogram& ah, value::TypeTags tag, bool includeScalar) {
+double getTypeCard(const ArrayHistogram& ah,
+ value::TypeTags tag,
+ value::Value val,
+ bool includeScalar) {
double count = 0.0;
- // TODO SERVER-70936: booleans are estimated by different type counters (unless in arrays).
- if (tag == sbe::value::TypeTags::Boolean) {
- uasserted(7051101, "Cannot estimate boolean types yet with histogram CE.");
+ if (includeScalar) {
+ // Include scalar type count estimate.
+ switch (tag) {
+ case value::TypeTags::Boolean: {
+ // In the case of booleans, we have separate true/false counters we can use.
+ const bool estTrue = value::bitcastTo<bool>(val);
+ if (estTrue) {
+ count += ah.getTrueCount();
+ } else {
+ count += ah.getFalseCount();
+ }
+ break;
+ }
+ case value::TypeTags::Array: {
+ // Note that if we are asked by the optimizer to estimate an interval whose bounds
+ // are arrays, this means we are trying to estimate equality on nested arrays. In
+ // this case, we do not want to include the "scalar" type counter for the array
+ // type, because this will cause us to estimate the nested array case as counting
+ // all arrays, regardless of whether or not they are nested.
+ break;
+ }
+ // TODO SERVER-71377: Use both missing & null counters for null equality.
+ // case value::TypeTags::Null: {}
+ default: { count += ah.getTypeCount(tag); }
+ }
}
- // Note that if we are asked by the optimizer to estimate an interval whose bounds are arrays,
- // this means we are trying to estimate equality on nested arrays. In this case, we do not want
- // to include the "scalar" type counter for the array type, because this will cause us to
- // estimate the nested array case as counting all arrays, regardless of whether or not they are
- // nested.
- if (includeScalar && tag != value::TypeTags::Array) {
- count += ah.getTypeCount(tag);
- }
if (ah.isArray()) {
+ // Include array type count estimate.
count += ah.getArrayTypeCount(tag);
}
+
return count;
}
@@ -436,7 +455,7 @@ CEType estimateIntervalCardinality(const ArrayHistogram& ah,
}
// Otherwise, we return the cardinality for the type of the intervals.
- return {getTypeCard(ah, tag, includeScalar)};
+ return {getTypeCard(ah, tag, val, includeScalar)};
}
// Otherwise, we have a range.
@@ -476,9 +495,9 @@ CEType estimateIntervalCardinality(const ArrayHistogram& ah,
// non-histogrammable types. Otherwise, we need to figure out which type(s) are included by this
// range.
if (lowTag == highTag || isIntervalSubsetOfType(interval, lowTag)) {
- return {getTypeCard(ah, lowTag, includeScalar)};
+ return {getTypeCard(ah, lowTag, lowVal, includeScalar)};
} else if (isIntervalSubsetOfType(interval, highTag)) {
- return {getTypeCard(ah, highTag, includeScalar)};
+ return {getTypeCard(ah, highTag, highVal, includeScalar)};
}
// If we reach here, we've given up estimating, because our interval intersected both high & low