SERVER-70936 Implement CE for booleans

author: Alya Berciu <alya.berciu@mongodb.com> 2022-12-16 10:06:28 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-12-16 11:00:05 +0000
commit: 49a208dc8950dfcc85ee65e85fb725be80dd4aba (patch)
tree: 79be88911e68025ea906d74a4e7afb271f8445f0 /src/mongo/db/query/ce/histogram_predicate_estimation.cpp
parent: 46079d2869363a651457013e18e8e10f44502ed9 (diff)
download: mongo-49a208dc8950dfcc85ee65e85fb725be80dd4aba.tar.gz
1 files changed, 34 insertions, 15 deletions
diff --git a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
index fca3db899f7..22452726877 100644
--- a/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
+++ b/src/mongo/db/query/ce/histogram_predicate_estimation.cpp
@@ -252,25 +252,44 @@ EstimationResult estimate(const ScalarHistogram& h,
 /**
  * Returns how many values of the given type are known by the array histogram.
  */
-double getTypeCard(const ArrayHistogram& ah, value::TypeTags tag, bool includeScalar) {
+double getTypeCard(const ArrayHistogram& ah,
+                   value::TypeTags tag,
+                   value::Value val,
+                   bool includeScalar) {
     double count = 0.0;
 
-    // TODO SERVER-70936: booleans are estimated by different type counters (unless in arrays).
-    if (tag == sbe::value::TypeTags::Boolean) {
-        uasserted(7051101, "Cannot estimate boolean types yet with histogram CE.");
+    if (includeScalar) {
+        // Include scalar type count estimate.
+        switch (tag) {
+            case value::TypeTags::Boolean: {
+                // In the case of booleans, we have separate true/false counters we can use.
+                const bool estTrue = value::bitcastTo<bool>(val);
+                if (estTrue) {
+                    count += ah.getTrueCount();
+                } else {
+                    count += ah.getFalseCount();
+                }
+                break;
+            }
+            case value::TypeTags::Array: {
+                // Note that if we are asked by the optimizer to estimate an interval whose bounds
+                // are arrays, this means we are trying to estimate equality on nested arrays. In
+                // this case, we do not want to include the "scalar" type counter for the array
+                // type, because this will cause us to estimate the nested array case as counting
+                // all arrays, regardless of whether or not they are nested.
+                break;
+            }
+            // TODO SERVER-71377: Use both missing & null counters for null equality.
+            // case value::TypeTags::Null: {}
+            default: { count += ah.getTypeCount(tag); }
+        }
     }
 
-    // Note that if we are asked by the optimizer to estimate an interval whose bounds are  arrays,
-    // this means we are trying to estimate equality on nested arrays. In this case, we do not want
-    // to include the "scalar" type counter for the array type, because this will cause us to
-    // estimate the nested array case as counting all arrays, regardless of whether or not they are
-    // nested.
-    if (includeScalar && tag != value::TypeTags::Array) {
-        count += ah.getTypeCount(tag);
-    }
     if (ah.isArray()) {
+        // Include array type count estimate.
         count += ah.getArrayTypeCount(tag);
     }
+
     return count;
 }
 
@@ -436,7 +455,7 @@ CEType estimateIntervalCardinality(const ArrayHistogram& ah,
         }
 
         // Otherwise, we return the cardinality for the type of the intervals.
-        return {getTypeCard(ah, tag, includeScalar)};
+        return {getTypeCard(ah, tag, val, includeScalar)};
     }
 
     // Otherwise, we have a range.
@@ -476,9 +495,9 @@ CEType estimateIntervalCardinality(const ArrayHistogram& ah,
     // non-histogrammable types. Otherwise, we need to figure out which type(s) are included by this
     // range.
     if (lowTag == highTag || isIntervalSubsetOfType(interval, lowTag)) {
-        return {getTypeCard(ah, lowTag, includeScalar)};
+        return {getTypeCard(ah, lowTag, lowVal, includeScalar)};
     } else if (isIntervalSubsetOfType(interval, highTag)) {
-        return {getTypeCard(ah, highTag, includeScalar)};
+        return {getTypeCard(ah, highTag, highVal, includeScalar)};
     }
 
     // If we reach here, we've given up estimating, because our interval intersected both high & low
author	Alya Berciu <alya.berciu@mongodb.com>	2022-12-16 10:06:28 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-12-16 11:00:05 +0000
commit	49a208dc8950dfcc85ee65e85fb725be80dd4aba (patch)
tree	79be88911e68025ea906d74a4e7afb271f8445f0 /src/mongo/db/query/ce/histogram_predicate_estimation.cpp
parent	46079d2869363a651457013e18e8e10f44502ed9 (diff)
download	mongo-49a208dc8950dfcc85ee65e85fb725be80dd4aba.tar.gz