diff options
author | Alya Berciu <alya.berciu@mongodb.com> | 2022-12-16 10:06:28 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-16 11:00:05 +0000 |
commit | 49a208dc8950dfcc85ee65e85fb725be80dd4aba (patch) | |
tree | 79be88911e68025ea906d74a4e7afb271f8445f0 /src/mongo/db/query/ce/histogram_predicate_estimation.cpp | |
parent | 46079d2869363a651457013e18e8e10f44502ed9 (diff) | |
download | mongo-49a208dc8950dfcc85ee65e85fb725be80dd4aba.tar.gz |
SERVER-70936 Implement CE for booleans
Diffstat (limited to 'src/mongo/db/query/ce/histogram_predicate_estimation.cpp')
-rw-r--r-- | src/mongo/db/query/ce/histogram_predicate_estimation.cpp | 49 |
1 files changed, 34 insertions, 15 deletions
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp index fca3db899f7..22452726877 100644 --- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp +++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp @@ -252,25 +252,44 @@ EstimationResult estimate(const ScalarHistogram& h, /** * Returns how many values of the given type are known by the array histogram. */ -double getTypeCard(const ArrayHistogram& ah, value::TypeTags tag, bool includeScalar) { +double getTypeCard(const ArrayHistogram& ah, + value::TypeTags tag, + value::Value val, + bool includeScalar) { double count = 0.0; - // TODO SERVER-70936: booleans are estimated by different type counters (unless in arrays). - if (tag == sbe::value::TypeTags::Boolean) { - uasserted(7051101, "Cannot estimate boolean types yet with histogram CE."); + if (includeScalar) { + // Include scalar type count estimate. + switch (tag) { + case value::TypeTags::Boolean: { + // In the case of booleans, we have separate true/false counters we can use. + const bool estTrue = value::bitcastTo<bool>(val); + if (estTrue) { + count += ah.getTrueCount(); + } else { + count += ah.getFalseCount(); + } + break; + } + case value::TypeTags::Array: { + // Note that if we are asked by the optimizer to estimate an interval whose bounds + // are arrays, this means we are trying to estimate equality on nested arrays. In + // this case, we do not want to include the "scalar" type counter for the array + // type, because this will cause us to estimate the nested array case as counting + // all arrays, regardless of whether or not they are nested. + break; + } + // TODO SERVER-71377: Use both missing & null counters for null equality. + // case value::TypeTags::Null: {} + default: { count += ah.getTypeCount(tag); } + } } - // Note that if we are asked by the optimizer to estimate an interval whose bounds are arrays, - // this means we are trying to estimate equality on nested arrays. In this case, we do not want - // to include the "scalar" type counter for the array type, because this will cause us to - // estimate the nested array case as counting all arrays, regardless of whether or not they are - // nested. - if (includeScalar && tag != value::TypeTags::Array) { - count += ah.getTypeCount(tag); - } if (ah.isArray()) { + // Include array type count estimate. count += ah.getArrayTypeCount(tag); } + return count; } @@ -436,7 +455,7 @@ CEType estimateIntervalCardinality(const ArrayHistogram& ah, } // Otherwise, we return the cardinality for the type of the intervals. - return {getTypeCard(ah, tag, includeScalar)}; + return {getTypeCard(ah, tag, val, includeScalar)}; } // Otherwise, we have a range. @@ -476,9 +495,9 @@ CEType estimateIntervalCardinality(const ArrayHistogram& ah, // non-histogrammable types. Otherwise, we need to figure out which type(s) are included by this // range. if (lowTag == highTag || isIntervalSubsetOfType(interval, lowTag)) { - return {getTypeCard(ah, lowTag, includeScalar)}; + return {getTypeCard(ah, lowTag, lowVal, includeScalar)}; } else if (isIntervalSubsetOfType(interval, highTag)) { - return {getTypeCard(ah, highTag, includeScalar)}; + return {getTypeCard(ah, highTag, highVal, includeScalar)}; } // If we reach here, we've given up estimating, because our interval intersected both high & low |