SERVER-67166 Heuristic CE for SargableNode

author: Timour Katchaounov <timour.katchaounov@mongodb.com> 2022-07-19 09:57:59 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2022-07-19 10:30:38 +0000
commit: 4af213f08a582a90534f5f98a9cb1d0976e4945b (patch)
tree: dc6b991beacaa4c91dfe5a1d7d9874a548bf946a
parent: 0fa5c9a8075ef9d27d3d94b68876a6fbd5afd54f (diff)
download: mongo-4af213f08a582a90534f5f98a9cb1d0976e4945b.tar.gz
15 files changed, 604 insertions, 89 deletions
diff --git a/src/mongo/db/commands/cqf/cqf_aggregate.cpp b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
index c51302d28a8..304ea553f69 100644
--- a/src/mongo/db/commands/cqf/cqf_aggregate.cpp
+++ b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
@@ -493,7 +493,7 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
     } else if (internalQueryCardinalityEstimatorMode == ce::kHistogram &&
                ce::CollectionStatistics::hasCollectionStatistics(nss)) {
         const auto& stats = ce::CollectionStatistics::getCollectionStatistics(nss);
-        auto ceDerivation = std::make_unique<CEHistogramTransport>(opCtx, stats);
+        auto ceDerivation = std::make_unique<CEHistogramTransport>(stats);
         OptPhaseManager phaseManager{OptPhaseManager::getAllRewritesSet(),
                                      prefixId,
                                      false /*requireRID*/,
diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript
index 4941d74512d..46c8a254e75 100644
--- a/src/mongo/db/query/ce/SConscript
+++ b/src/mongo/db/query/ce/SConscript
@@ -9,7 +9,6 @@ env.Library(
     source=[
         'array_histogram.cpp',
         'ce_histogram.cpp',
-        'ce_math.cpp',
         'ce_sampling.cpp',
         'ce_estimation.cpp',
         'collection_statistics.cpp',
@@ -46,3 +45,13 @@ env.CppUnitTest(
         'ce_test_utils',
     ],
 )
+
+env.CppUnitTest(
+    target="ce_heuristic_test",
+    source=[
+        "ce_heuristic_test.cpp",
+    ],
+    LIBDEPS=[
+        'ce_test_utils',
+    ],
+)
diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp
new file mode 100644
index 00000000000..f8a98c1001e
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_heuristic_test.cpp
@@ -0,0 +1,218 @@
+/**
+ *    Copyright (C) 2022-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <string>
+
+#include "mongo/db/query/ce/ce_test_utils.h"
+#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
+#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
+#include "mongo/db/query/optimizer/cascades/memo.h"
+#include "mongo/db/query/optimizer/defs.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/metadata.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::ce {
+namespace {
+
+using namespace optimizer;
+using namespace optimizer::cascades;
+
+constexpr double kCollCard = 10000.0;
+const std::string collName = "test";
+
+class HeuristicCETester : public CETester {
+public:
+    HeuristicCETester(std::string collName) : CETester(collName, 0) {}
+
+protected:
+    std::unique_ptr<CEInterface> getCETransport() const override {
+        return std::make_unique<HeuristicCE>();
+    }
+};
+
+HeuristicCETester ht(collName);
+
+TEST(CEHeuristicTest, CEwithoutOptimization) {
+    std::string query = "{a0 : {$gt : 14, $lt : 21}}";
+    ht.setCollCard(kCollCard);
+    double ce = ht.getCE(query, 0);
+    ASSERT_APPROX_EQUAL(100.0, ce, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Eq) {
+    std::string query = "{a : 123}";
+    ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt) {
+    std::string query = "{a: {$gt: 44}}";
+    ASSERT_MATCH_CE_CARD(ht, query, 0.01, 0.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 0.7, 1.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 330, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt_Lt) {
+    std::string query = "{a: {$gt: 44, $lt: 99}}";
+    ASSERT_MATCH_CE_CARD(ht, query, 0.585662, 1.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 5.27096, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 29.885, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 189.571, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND2Eq) {
+    std::string query = "{a : 13, b : 42}";
+    ASSERT_MATCH_CE_CARD(ht, query, 1.31607, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.62658, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 10.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND3Eq) {
+    std::string query = "{a : 13, b : 42, c : 69}";
+    ASSERT_MATCH_CE_CARD(ht, query, 1.1472, 3.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.27537, 7.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.33352, 10.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 100.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR1path) {
+    std::string query = "{$or: [{a0: {$gt: 44}}, {a0: {$lt: 9}}]}";
+    ASSERT_MATCH_CE_CARD(ht, query, 7.52115, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 58.6188, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 451.581, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR2paths) {
+    std::string query = "{$or: [{a0: {$gt:44}}, {b0: {$lt: 9}}]}";
+    // TODO: Disjunctions on different paths are not SARGable, and are represented as a single
+    // FilterNode. FilterNode is currenlty estimated to be always 10% selectivity.
+    ASSERT_MATCH_CE_CARD(ht, query, 0.9, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 9.9, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathSimple) {
+    std::string query =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
+        "]}";
+    ASSERT_MATCH_CE_CARD(ht, query, 6.59965, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 41.2515, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 270.42, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) {
+    // Each disjunct has different number of conjuncts, so that its selectivity is different.
+    // We need 5 disjuncts to test exponential backoff which cuts off at the first 4.
+    // The conjuncts are in selectivity order.
+    std::string query1 =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}, {a0: {$lt: 77}}]}"
+        "]}";
+    auto ce1 = ht.getCE(query1);
+    // The conjuncts are in inverse selectivity order.
+    std::string query2 =
+        "{$or: ["
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}, {a0: {$lt: 77}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+        "81}}]},"
+        "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]}"
+        "]}";
+    auto ce2 = ht.getCE(query2);
+    ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) {
+    // TODO: Disjunctions on different paths are not SARGable, and are represented as a single
+    // FilterNode. FilterNode is currently estimated to be always 10% selectivity.
+    std::string query =
+        "{$or: ["
+        "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+        "{$and: [{b0: {$gt:40}}, {b0: {$lt: 44}}]}"
+        "]}";
+    ASSERT_MATCH_CE_CARD(ht, query, 0.9, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 9.9, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 100.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF1path) {
+    std::string query =
+        "{$and : ["
+        "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
+        "{$or : [ {a0 : {$gt : 77}}, {a0 : {$eq : 51}} ]}"
+        "]}";
+    ASSERT_MATCH_CE_CARD(ht, query, 6.87663, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 42.7435, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 275.419, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF2paths) {
+    std::string query =
+        "{$and : ["
+        "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
+        "{$or : [ {b0 : {$gt : 77}}, {b0 : {$eq : 51}} ]}"
+        "]}";
+    ASSERT_MATCH_CE_CARD(ht, query, 6.21212, 9.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 36.4418, 99.0);
+    ASSERT_MATCH_CE_CARD(ht, query, 228.935, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) {
+    std::string query = "{a : 13, b : 42}";
+    ht.setCollCard(kCollCard);
+    double ce = ht.getCE(query, 2);
+    ASSERT_APPROX_EQUAL(10.0, ce, kMaxCEError);
+}
+
+}  // namespace
+}  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
index 1a04b740333..7ce299b4707 100644
--- a/src/mongo/db/query/ce/ce_histogram.cpp
+++ b/src/mongo/db/query/ce/ce_histogram.cpp
@@ -31,11 +31,11 @@
 
 #include "mongo/db/query/ce/ce_estimation.h"
 #include "mongo/db/query/ce/ce_histogram.h"
-#include "mongo/db/query/ce/ce_math.h"
 #include "mongo/db/query/ce/collection_statistics.h"
 
 #include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
 #include "mongo/db/query/optimizer/utils/abt_hash.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
 #include "mongo/db/query/optimizer/utils/memo_utils.h"
 
 namespace mongo::optimizer::cascades {
@@ -44,8 +44,8 @@ using namespace properties;
 
 class CEHistogramTransportImpl {
 public:
-    CEHistogramTransportImpl(OperationContext* opCtx, const ce::CollectionStatistics& stats)
-        : _opCtx(opCtx), _heuristicCE(), _stats(stats) {}
+    CEHistogramTransportImpl(const ce::CollectionStatistics& stats)
+        : _heuristicCE(), _stats(stats) {}
 
     ~CEHistogramTransportImpl() {}
 
@@ -101,16 +101,16 @@ public:
                     conjSelectivities.push_back(cardinality / _stats.getCardinality());
                 }
 
-                auto backoff = ce::conjExponentialBackoff(conjSelectivities);
+                auto backoff = ce::conjExponentialBackoff(std::move(conjSelectivities));
                 disjSelectivities.push_back(backoff);
             }
 
-            auto backoff = ce::disjExponentialBackoff(disjSelectivities);
+            auto backoff = ce::disjExponentialBackoff(std::move(disjSelectivities));
             topLevelSelectivities.push_back(backoff);
         }
 
         // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
-        auto backoff = ce::conjExponentialBackoff(topLevelSelectivities);
+        auto backoff = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
         return backoff * childResult;
     }
 
@@ -140,14 +140,12 @@ public:
     }
 
 private:
-    OperationContext* _opCtx;
     HeuristicCE _heuristicCE;
     const ce::CollectionStatistics& _stats;
 };
 
-CEHistogramTransport::CEHistogramTransport(OperationContext* opCtx,
-                                           const ce::CollectionStatistics& stats)
-    : _impl(std::make_unique<CEHistogramTransportImpl>(opCtx, stats)) {}
+CEHistogramTransport::CEHistogramTransport(const ce::CollectionStatistics& stats)
+    : _impl(std::make_unique<CEHistogramTransportImpl>(stats)) {}
 
 CEHistogramTransport::~CEHistogramTransport() {}
 
diff --git a/src/mongo/db/query/ce/ce_histogram.h b/src/mongo/db/query/ce/ce_histogram.h
index 6132cc19339..dfc556ebf87 100644
--- a/src/mongo/db/query/ce/ce_histogram.h
+++ b/src/mongo/db/query/ce/ce_histogram.h
@@ -38,7 +38,7 @@ class CEHistogramTransportImpl;
 
 class CEHistogramTransport : public CEInterface {
 public:
-    CEHistogramTransport(OperationContext* opCtx, const ce::CollectionStatistics& stats);
+    CEHistogramTransport(const ce::CollectionStatistics& stats);
     ~CEHistogramTransport();
 
     CEType deriveCE(const Memo& memo,
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
index 76a32ef5fa0..3be39685e83 100644
--- a/src/mongo/db/query/ce/ce_histogram_test.cpp
+++ b/src/mongo/db/query/ce/ce_histogram_test.cpp
@@ -27,6 +27,8 @@
  *    it in the license file.
  */
 
+#include <iostream>
+
 #include "mongo/db/query/ce/ce_estimation.h"
 #include "mongo/db/query/ce/ce_histogram.h"
 #include "mongo/db/query/ce/ce_test_utils.h"
@@ -45,8 +47,8 @@ public:
         : CETester(collName, numRecords), _stats{stats} {}
 
 protected:
-    std::unique_ptr<CEInterface> getCETransport(OperationContext* opCtx) override {
-        return std::make_unique<CEHistogramTransport>(opCtx, _stats);
+    std::unique_ptr<CEInterface> getCETransport() const override {
+        return std::make_unique<CEHistogramTransport>(_stats);
     }
 
 private:
@@ -118,7 +120,7 @@ TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) {
     ASSERT_MATCH_CE(t, "{a: {$eq: \"foo\"}}", 0.0);
 
     // Test case when field doesn't match fieldpath of histogram. This falls back to heuristics.
-    ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 0.8);
+    ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 2.82843);
 
     // Test $gt.
     ASSERT_MATCH_CE(t, "{a: {$gt: 3}}", 0.0);
@@ -201,8 +203,8 @@ TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) {
 
     // Test conjunctions over multiple fields for which we may not have histograms. This falls back
     // to heuristic estimation.
-    ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 0.09);
-    ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 0.09);
+    ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 1.73205);
+    ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 1.73205);
 }
 
 TEST(CEHistogramTest, SanityTestEmptyHistogram) {
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp
index eb7824eb483..b35e36928dc 100644
--- a/src/mongo/db/query/ce/ce_test_utils.cpp
+++ b/src/mongo/db/query/ce/ce_test_utils.cpp
@@ -27,9 +27,12 @@
  *    it in the license file.
  */
 
+#include <cstddef>
+
 #include "mongo/db/query/ce/ce_test_utils.h"
 
 #include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
 #include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
 #include "mongo/db/query/optimizer/explain.h"
 #include "mongo/db/query/optimizer/opt_phase_manager.h"
@@ -41,27 +44,31 @@ namespace mongo::ce {
 using namespace optimizer;
 using namespace cascades;
 
-CETester::CETester(std::string collName, double numRecords)
-    : _collName(std::move(collName)), _numRecords(std::move(numRecords)) {}
-
-double CETester::getCE(const std::string& query) {
-    // Mock opCtx for test.
-    QueryTestServiceContext serviceContext;
-    auto opCtx = serviceContext.makeOperationContext();
+CETester::CETester(std::string collName, double collCard)
+    : _collName(std::move(collName)), _collCard(collCard) {}
 
+double CETester::getCE(const std::string& query, size_t optimizationLevel) const {
     // Mock memo.
-    ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _numRecords);
+    ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _collCard);
     Metadata metadata({{_collName, sd}});
-    Memo memo(DebugInfo::kDefaultForTests,
-              metadata,
-              std::make_unique<DefaultLogicalPropsDerivation>(),
-              std::make_unique<HeuristicCE>());
 
-    // Construct placeholder PhaseManager.
+    std::vector<OptPhaseManager::OptPhase> optPhaseChoices{
+        OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+        OptPhaseManager::OptPhase::MemoExplorationPhase};
+    optimizationLevel = std::min(optimizationLevel, optPhaseChoices.size());
+    OptPhaseManager::PhaseSet optPhases;
+    for (size_t i = 0; i < optimizationLevel; ++i) {
+        optPhases.insert(optPhaseChoices[i]);
+    }
+
+    // Construct placeholder PhaseManager. Notice that it also creates a Memo internally.
     PrefixId prefixId;
-    OptPhaseManager phaseManager({OptPhaseManager::OptPhase::MemoSubstitutionPhase},
+    OptPhaseManager phaseManager(optPhases,
                                  prefixId,
-                                 {{{_collName, {{}, {}}}}},
+                                 true /*requireRID*/,
+                                 metadata,
+                                 std::make_unique<HeuristicCE>(),
+                                 std::make_unique<DefaultCosting>(),
                                  DebugInfo::kDefaultForTests);
 
     // Construct ABT from pipeline and optimize.
@@ -69,8 +76,15 @@ double CETester::getCE(const std::string& query) {
     ASSERT_TRUE(phaseManager.optimize(abt));
 
     // Get cardinality estimate.
-    auto cht = getCETransport(opCtx.get());
-    return cht->deriveCE(memo, {}, abt.ref());
+    auto cht = getCETransport();
+    auto ce = cht->deriveCE(phaseManager.getMemo(), {}, abt.ref());
+
+#ifdef CE_TEST_LOG_MODE
+    std::cout << "Query: " << query << ", card: " << _collCard << ", Estimated: " << ce
+              << std::endl;
+#endif
+
+    return ce;
 }
 
 }  // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h
index e6fc660e27b..4d21e834a4e 100644
--- a/src/mongo/db/query/ce/ce_test_utils.h
+++ b/src/mongo/db/query/ce/ce_test_utils.h
@@ -29,6 +29,9 @@
 
 #pragma once
 
+#include <cstddef>
+#include <sys/types.h>
+
 #include "mongo/db/query/optimizer/cascades/interfaces.h"
 
 namespace mongo {
@@ -44,14 +47,35 @@ class CEInterface;
 
 namespace ce {
 
+// Enable this flag to log all estimates, and let all tests pass.
+//#define CE_TEST_LOG_MODE 1
+
 const double kMaxCEError = 0.01;
 
 /**
- * A helpful macro for asserting that the CE of a $match predicate is approximately what we were
+ * Helpful macros for asserting that the CE of a $match predicate is approximately what we were
  * expecting.
  */
+
+#ifndef CE_TEST_LOG_MODE
 #define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
     ASSERT_APPROX_EQUAL(expectedCE, ce.getCE(predicate), kMaxCEError)
+#else
+#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
+    ce.getCE(predicate);                           \
+    ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError)
+#endif
+
+#ifndef CE_TEST_LOG_MODE
+#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
+    ce.setCollCard(collCard);                                     \
+    ASSERT_APPROX_EQUAL(expectedCE, ce.getCE(predicate), kMaxCEError)
+#else
+#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
+    ce.setCollCard(collCard);                                     \
+    ce.getCE(predicate);                                          \
+    ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError)
+#endif
 
 /**
  * A test utility class for helping verify the cardinality of CE transports on a given $match
@@ -64,19 +88,22 @@ public:
     /**
      * Returns the estimated cardinality of a given 'matchPredicate'.
      */
-    double getCE(const std::string& matchPredicate);
+    double getCE(const std::string& matchPredicate, size_t optimizationLevel = 1) const;
+
+    void setCollCard(double card) {
+        _collCard = card;
+    }
 
 protected:
     /**
      * Subclasses need to override this method to initialize the transports they are testing.
      */
-    virtual std::unique_ptr<optimizer::cascades::CEInterface> getCETransport(
-        OperationContext* opCtx) = 0;
+    virtual std::unique_ptr<optimizer::cascades::CEInterface> getCETransport() const = 0;
 
     std::string _collName;
 
     // The number of records in the collection we are testing.
-    double _numRecords;
+    double _collCard;
 };
 
 }  // namespace ce
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index 02b3b90bcd0..c050a0d79e7 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -39,6 +39,7 @@ env.Library(
         "rewrites/path_lower.cpp",
         "syntax/expr.cpp",
         "utils/abt_hash.cpp",
+        'utils/ce_math.cpp',
         "utils/interval_utils.cpp",
         "utils/memo_utils.cpp",
         "utils/utils.cpp",
diff --git a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
index 6d89cd1a436..802c1d24892 100644
--- a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
+++ b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
@@ -28,18 +28,26 @@
  */
 
 #include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
-#include "mongo/db/query/optimizer/utils/memo_utils.h"
+
+#include "mongo/db/query/optimizer/cascades/memo.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
+#include "mongo/util/assert_util.h"
 
 namespace mongo::optimizer::cascades {
 
 using namespace properties;
+using namespace mongo::ce;
+
+// Invalid estimate - an arbitrary negative value used for initialization.
+constexpr SelectivityType kInvalidSel = -1.0;
+constexpr SelectivityType kDefaultFilterSel = 0.1;
 
 class CEHeuristicTransport {
 public:
     CEType transport(const ScanNode& node, CEType /*bindResult*/) {
         // Default cardinality estimate.
         const CEType metadataCE = _memo.getMetadata()._scanDefs.at(node.getScanDefName()).getCE();
-        return (metadataCE < 0.0) ? 1000.00 : metadataCE;
+        return (metadataCE < 0.0) ? kDefaultCard : metadataCE;
     }
 
     CEType transport(const ValueScanNode& node, CEType /*bindResult*/) {
@@ -53,6 +61,9 @@ public:
     }
 
     CEType transport(const FilterNode& node, CEType childResult, CEType /*exprResult*/) {
+        if (childResult == 0.0) {
+            return 0.0;
+        }
         if (node.getFilter() == Constant::boolean(true)) {
             // Trivially true filter.
             return childResult;
@@ -61,7 +72,7 @@ public:
             return 0.0;
         } else {
             // Estimate filter selectivity at 0.1.
-            return 0.1 * childResult;
+            return std::max(kDefaultFilterSel * childResult, kMinCard);
         }
     }
 
@@ -70,16 +81,120 @@ public:
         return childResult;
     }
 
+    /**
+     * Default selectivity of equalities. To avoid super small selectivities for small
+     * cardinalities, that would result in 0 cardinality for many small inputs, the
+     * estimate is scaled as inputCard grows. The bigger inputCard, the smaller the
+     * selectivity.
+     */
+    SelectivityType equalitySel(const CEType inputCard) {
+        uassert(6716604, "Zero cardinality must be handled by the caller.", inputCard > 0.0);
+        return std::sqrt(inputCard) / inputCard;
+    }
+
+    /**
+     * Default selectivity of intervals with bounds on both ends. These intervals are
+     * considered less selective than equalities.
+     * Examples: (a > 'abc' AND a < 'hta'), (0 < b <= 13)
+     */
+    SelectivityType closedRangeSel(const CEType inputCard) {
+        CEType sel = kInvalidSel;
+        if (inputCard < 20.0) {
+            sel = 0.50;
+        } else if (inputCard < 100.0) {
+            sel = 0.33;
+        } else {
+            sel = 0.20;
+        }
+        return sel;
+    }
+
+    /**
+     * Default selectivity of intervals open on one end. These intervals are
+     * considered less selective than those with both ends specified by the user query.
+     * Examples: (a > 'xyz'), (b <= 13)
+     */
+    SelectivityType openRangeSel(const CEType inputCard) {
+        SelectivityType sel = kInvalidSel;
+        if (inputCard < 20.0) {
+            sel = 0.70;
+        } else if (inputCard < 100.0) {
+            sel = 0.45;
+        } else {
+            sel = 0.33;
+        }
+        return sel;
+    }
+
+    mongo::sbe::value::TypeTags boundType(const BoundRequirement& bound) {
+        const auto constBoundPtr = bound.getBound().cast<Constant>();
+        if (constBoundPtr == nullptr) {
+            return mongo::sbe::value::TypeTags::Nothing;
+        }
+        const auto [tag, val] = constBoundPtr->get();
+        return tag;
+    }
+
+    SelectivityType intervalSel(const IntervalRequirement& interval, const CEType inputCard) {
+        SelectivityType sel = kInvalidSel;
+        if (interval.isFullyOpen()) {
+            sel = 1.0;
+        } else if (interval.isEquality()) {
+            sel = equalitySel(inputCard);
+        } else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() ||
+                   boundType(interval.getLowBound()) != boundType(interval.getHighBound())) {
+            // The interval has an actual bound only on one of it ends if:
+            // - one of the bounds is infinite, or
+            // - both bounds are of a different type - this is the case when due to type bracketing
+            //   one of the bounds is the lowest/highest value of the previous/next type.
+            // TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
+            // so sometimes we may not detect an open-ended interval.
+            sel = openRangeSel(inputCard);
+        } else {
+            sel = closedRangeSel(inputCard);
+        }
+        uassert(6716603, "Invalid selectivity.", validSelectivity(sel));
+        return sel;
+    }
+
     CEType transport(const SargableNode& node,
-                     CEType /*childResult*/,
+                     CEType childResult,
                      CEType /*bindsResult*/,
                      CEType /*refsResult*/) {
-        ABT lowered = node.getChild();
+        // Early out and return 0 since we don't expect to get more results.
+        if (childResult == 0.0) {
+            return 0.0;
+        }
+
+        SelectivityType topLevelSel = 1.0;
+        std::vector<SelectivityType> topLevelSelectivities;
         for (const auto& [key, req] : node.getReqMap()) {
-            // TODO: consider issuing one filter node per interval.
-            lowerPartialSchemaRequirement(key, req, lowered);
+            SelectivityType disjSel = 1.0;
+            std::vector<SelectivityType> disjSelectivities;
+            // Intervals are in DNF.
+            const auto intervalDNF = req.getIntervals();
+            const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
+            for (const auto& disjunct : disjuncts) {
+                const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
+                SelectivityType conjSel = 1.0;
+                std::vector<SelectivityType> conjSelectivities;
+                for (const auto& conjunct : conjuncts) {
+                    const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
+                    const SelectivityType sel = intervalSel(interval, childResult);
+                    conjSelectivities.push_back(sel);
+                }
+                conjSel = ce::conjExponentialBackoff(std::move(conjSelectivities));
+                disjSelectivities.push_back(conjSel);
+            }
+            disjSel = ce::disjExponentialBackoff(std::move(disjSelectivities));
+            topLevelSelectivities.push_back(disjSel);
         }
-        return algebra::transport<false>(lowered, *this);
+
+        // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
+        topLevelSel = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
+        CEType card = std::max(topLevelSel * childResult, kMinCard);
+        uassert(6716602, "Invalid cardinality.", mongo::ce::validCardinality(card));
+        return card;
     }
 
     CEType transport(const RIDIntersectNode& node,
@@ -96,7 +211,7 @@ public:
                      CEType /*exprResult*/) {
         const auto& filter = node.getFilter();
 
-        double selectivity = 0.1;
+        SelectivityType selectivity = kDefaultFilterSel;
         if (filter == Constant::boolean(false)) {
             selectivity = 0.0;
         } else if (filter == Constant::boolean(true)) {
@@ -194,7 +309,8 @@ private:
 CEType HeuristicCE::deriveCE(const Memo& memo,
                              const LogicalProps& /*logicalProps*/,
                              const ABT::reference_type logicalNodeRef) const {
-    return CEHeuristicTransport::derive(memo, logicalNodeRef);
+    CEType card = CEHeuristicTransport::derive(memo, logicalNodeRef);
+    return card;
 }
 
 }  // namespace mongo::optimizer::cascades
diff --git a/src/mongo/db/query/optimizer/interval_intersection_test.cpp b/src/mongo/db/query/optimizer/interval_intersection_test.cpp
index a965e2600e2..33c2d5c4b5b 100644
--- a/src/mongo/db/query/optimizer/interval_intersection_test.cpp
+++ b/src/mongo/db/query/optimizer/interval_intersection_test.cpp
@@ -114,8 +114,8 @@ TEST(IntervalIntersection, SingleFieldIntersection) {
          "    BindBlock:\n"
          "        [rid_0]\n"
          "            Source []\n"},
-        {"{$or: [{$and: [{a0: {$gt:9}}, {a0: {$lt: 12}}]}, {$and: [{a0: {$gt:40}}, {a0: {$lt: "
-         "44}}]}]}",
+        {"{$or: [{$and: [{a0: {$gt:9, $lt:999}}, {a0: {$gt: 0, $lt: 12}}]},"
+         "       {$and: [{a0: {$gt:40, $lt:997}}, {a0: {$gt:0, $lt: 44}}]}]}",
          "Root []\n"
          "|   |   projections: \n"
          "|   |       scan_0\n"
diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
index a1468c7e8e7..6d0708d0703 100644
--- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
@@ -1440,12 +1440,12 @@ TEST(LogicalRewriter, SargableCE) {
         "    groupId: 1\n"
         "    |   |   Logical properties:\n"
         "    |   |       cardinalityEstimate: \n"
-        "    |   |           ce: 10\n"
+        "    |   |           ce: 5.62341\n"
         "    |   |           requirementCEs: \n"
         "    |   |               refProjection: ptest, path: 'PathGet [a] PathIdentity []', ce: "
-        "100\n"
+        "31.6228\n"
         "    |   |               refProjection: ptest, path: 'PathGet [b] PathIdentity []', ce: "
-        "100\n"
+        "31.6228\n"
         "    |   |       projections: \n"
         "    |   |           ptest\n"
         "    |   |       indexingAvailability: \n"
@@ -1473,7 +1473,7 @@ TEST(LogicalRewriter, SargableCE) {
         "    groupId: 2\n"
         "    |   |   Logical properties:\n"
         "    |   |       cardinalityEstimate: \n"
-        "    |   |           ce: 10\n"
+        "    |   |           ce: 5.62341\n"
         "    |   |       projections: \n"
         "    |   |           ptest\n"
         "    |   |       indexingAvailability: \n"
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 6f0a2654909..fc7cbf49485 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -40,6 +40,9 @@
 namespace mongo::optimizer {
 namespace {
 
+// Default selectivity of predicates used by HintedCE to force certain plans.
+constexpr double kDefaultSelectivity = 0.1;
+
 TEST(PhysRewriter, PhysicalRewriterBasic) {
     using namespace properties;
     PrefixId prefixId;
@@ -1189,6 +1192,17 @@ TEST(PhysRewriter, FilterIndexing4) {
 
     ABT scanNode = make<ScanNode>("root", "c1");
 
+    // TODO: SERVER-68006 will investigate why the plan changes without the hints.
+    PartialSchemaSelHints hints;
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("c", make<PathIdentity>())},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("d", make<PathIdentity>())},
+                  kDefaultSelectivity);
+
     ABT evalNode = make<EvaluationNode>(
         "pa",
         make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
@@ -1232,6 +1246,7 @@ TEST(PhysRewriter, FilterIndexing4) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{
                {},
@@ -1243,6 +1258,8 @@ TEST(PhysRewriter, FilterIndexing4) {
                                  false /*isMultiKey*/,
                                  {DistributionType::Centralized},
                                  {}}}}}}}},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = std::move(rootNode);
@@ -1251,7 +1268,7 @@ TEST(PhysRewriter, FilterIndexing4) {
     phaseManager.getHints()._disableHashJoinRIDIntersect = true;
 
     ASSERT_TRUE(phaseManager.optimize(optimized));
-    ASSERT_BETWEEN(65, 80, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_BETWEEN(65, 110, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     ASSERT_EXPLAIN_V2(
         "Root []\n"
@@ -1342,7 +1359,7 @@ TEST(PhysRewriter, FilterIndexing5) {
 
     ABT optimized = std::move(rootNode);
     ASSERT_TRUE(phaseManager.optimize(optimized));
-    ASSERT_BETWEEN(25, 55, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_BETWEEN(25, 70, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     // We can cover both fields with the index, and need separate sort on "b".
     ASSERT_EXPLAIN_V2(
@@ -1502,7 +1519,7 @@ TEST(PhysRewriter, FilterIndexingStress) {
 
     // Without the changes to restrict SargableNode split to which this test is tied, we would
     // be exploring 2^kFilterCount plans, one for each created group.
-    ASSERT_EQ(51, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_EQ(55, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
     ASSERT_BSON_PATH("\"Filter\"", explainRoot, "child.nodeType");
@@ -1554,6 +1571,14 @@ TEST(PhysRewriter, FilterIndexingVariable) {
 
     ABT scanNode = make<ScanNode>("root", "c1");
 
+    // TODO: SERVER-68006 will investigate why the plan changes without the hints.
+    PartialSchemaSelHints hints;
+    hints.emplace(PartialSchemaKey{"root",
+                                   make<PathGet>("a",
+                                                 make<PathTraverse>(make<PathIdentity>(),
+                                                                    PathTraverse::kSingleLevel))},
+                  kDefaultSelectivity);
+
     // Encode a condition using two query parameters (expressed as functions):
     // "a" > param_0 AND "a" >= param_1 (observe param_1 comparison is inclusive).
     ABT filterNode = make<FilterNode>(
@@ -1575,11 +1600,14 @@ TEST(PhysRewriter, FilterIndexingVariable) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{
                {},
                {{"index1",
                  makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)}}}}}},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = std::move(rootNode);
@@ -1724,7 +1752,7 @@ TEST(PhysRewriter, FilterReorder) {
                              make<PathGet>(projName,
                                            make<PathTraverse>(make<PathIdentity>(),
                                                               PathTraverse::kSingleLevel))},
-            0.1 * (kFilterCount - i));
+            kDefaultSelectivity * (kFilterCount - i));
         result = make<FilterNode>(
             make<EvalFilter>(make<PathGet>(std::move(projName),
                                            make<PathTraverse>(make<PathCompare>(Operations::Eq,
@@ -2086,6 +2114,12 @@ TEST(PhysRewriter, MultiKeyIndex) {
     using namespace properties;
     PrefixId prefixId;
 
+    PartialSchemaSelHints hints;
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+                  kDefaultSelectivity);
+
     ABT scanNode = make<ScanNode>("root", "c1");
 
     ABT evalANode = make<EvaluationNode>(
@@ -2120,12 +2154,15 @@ TEST(PhysRewriter, MultiKeyIndex) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{
                {},
                {{"index1", makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)},
                 {"index2",
                  makeIndexDefinition("b", CollationOp::Descending, false /*isMultiKey*/)}}}}}},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     {
@@ -2135,7 +2172,7 @@ TEST(PhysRewriter, MultiKeyIndex) {
         phaseManager.getHints()._disableHashJoinRIDIntersect = true;
 
         ASSERT_TRUE(phaseManager.optimize(optimized));
-        ASSERT_BETWEEN(15, 25, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+        ASSERT_BETWEEN(13, 25, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
         // GroupBy+Union cannot propagate collation requirement, and we need a separate
         // CollationNode.
@@ -2355,7 +2392,7 @@ TEST(PhysRewriter, CompoundIndex1) {
 
     ABT optimized = rootNode;
     ASSERT_TRUE(phaseManager.optimize(optimized));
-    ASSERT_BETWEEN(60, 110, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_BETWEEN(60, 130, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
     ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
@@ -2443,7 +2480,7 @@ TEST(PhysRewriter, CompoundIndex2) {
     ABT optimized = rootNode;
 
     ASSERT_TRUE(phaseManager.optimize(optimized));
-    ASSERT_BETWEEN(100, 170, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_BETWEEN(100, 210, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
     ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
@@ -2527,7 +2564,7 @@ TEST(PhysRewriter, CompoundIndex3) {
 
     ABT optimized = rootNode;
     ASSERT_TRUE(phaseManager.optimize(optimized));
-    ASSERT_BETWEEN(70, 110, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_BETWEEN(70, 130, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     ASSERT_EXPLAIN_V2(
         "Root []\n"
@@ -2932,6 +2969,18 @@ TEST(PhysRewriter, IndexBoundsIntersect3) {
     using namespace properties;
     PrefixId prefixId;
 
+    PartialSchemaSelHints hints;
+    hints.emplace(
+        PartialSchemaKey{
+            "root",
+            make<PathGet>(
+                "a",
+                make<PathTraverse>(
+                    make<PathGet>(
+                        "b", make<PathTraverse>(make<PathIdentity>(), PathTraverse::kSingleLevel)),
+                    PathTraverse::kSingleLevel))},
+        kDefaultSelectivity);
+
     ABT scanNode = make<ScanNode>("root", "c1");
 
     ABT filterNode = make<FilterNode>(
@@ -2960,6 +3009,7 @@ TEST(PhysRewriter, IndexBoundsIntersect3) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{
                {},
@@ -2967,6 +3017,8 @@ TEST(PhysRewriter, IndexBoundsIntersect3) {
                  IndexDefinition{{{makeIndexPath(FieldPathType{"a", "b"}, true /*isMultiKey*/),
                                    CollationOp::Ascending}},
                                  true /*isMultiKey*/}}}}}}},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
@@ -3159,10 +3211,10 @@ TEST(PhysRewriter, IndexResidualReq) {
 
     // Make sure we can use the index to cover "b" while testing "b.c" with a separate filter.
     ASSERT_EXPLAIN_PROPS_V2(
-        "Properties [cost: 0.070002, localCost: 0, adjustedCE: 10]\n"
+        "Properties [cost: 0.402121, localCost: 0, adjustedCE: 189.571]\n"
         "|   |   Logical:\n"
         "|   |       cardinalityEstimate: \n"
-        "|   |           ce: 10\n"
+        "|   |           ce: 189.571\n"
         "|   |       projections: \n"
         "|   |           pa\n"
         "|   |           root\n"
@@ -3183,14 +3235,14 @@ TEST(PhysRewriter, IndexResidualReq) {
         "|   |       pa\n"
         "|   RefBlock: \n"
         "|       Variable [pa]\n"
-        "Properties [cost: 0.070002, localCost: 0.070002, adjustedCE: 10]\n"
+        "Properties [cost: 0.402121, localCost: 0.402121, adjustedCE: 189.571]\n"
         "|   |   Logical:\n"
         "|   |       cardinalityEstimate: \n"
-        "|   |           ce: 10\n"
+        "|   |           ce: 189.571\n"
         "|   |           requirementCEs: \n"
-        "|   |               refProjection: root, path: 'PathGet [a] PathIdentity []', ce: 100\n"
+        "|   |               refProjection: root, path: 'PathGet [a] PathIdentity []', ce: 330\n"
         "|   |               refProjection: root, path: 'PathGet [b] PathGet [c] PathIdentity []', "
-        "ce: 100\n"
+        "ce: 330\n"
         "|   |       projections: \n"
         "|   |           pa\n"
         "|   |           root\n"
@@ -3286,7 +3338,7 @@ TEST(PhysRewriter, IndexResidualReq1) {
 
     ABT optimized = rootNode;
     ASSERT_TRUE(phaseManager.optimize(optimized));
-    ASSERT_BETWEEN(25, 30, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+    ASSERT_BETWEEN(25, 45, phaseManager.getMemo().getStats()._physPlanExplorationCount);
 
     // Prefer index1 over index2 and index3 in order to cover all fields.
     ASSERT_EXPLAIN_V2(
@@ -3322,6 +3374,19 @@ TEST(PhysRewriter, IndexResidualReq2) {
 
     ABT scanNode = make<ScanNode>("root", "c1");
 
+    // TODO: SERVER-68006 will investigate why the plan changes without the hints.
+    PartialSchemaSelHints hints;
+    hints.emplace(PartialSchemaKey{"root",
+                                   make<PathGet>("a",
+                                                 make<PathTraverse>(make<PathIdentity>(),
+                                                                    PathTraverse::kSingleLevel))},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root",
+                                   make<PathGet>("b",
+                                                 make<PathTraverse>(make<PathIdentity>(),
+                                                                    PathTraverse::kSingleLevel))},
+                  kDefaultSelectivity);
+
     ABT filterANode = make<FilterNode>(
         make<EvalFilter>(
             make<PathGet>("a",
@@ -3346,6 +3411,7 @@ TEST(PhysRewriter, IndexResidualReq2) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{{},
                           {{"index1",
@@ -3353,6 +3419,8 @@ TEST(PhysRewriter, IndexResidualReq2) {
                                 {{"a", CollationOp::Ascending, true /*isMultiKey*/},
                                  {"c", CollationOp::Ascending, true /*isMultiKey*/},
                                  {"b", CollationOp::Ascending, true /*isMultiKey*/}})}}}}}},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
@@ -3862,6 +3930,15 @@ TEST(PhysRewriter, IndexPartitioning) {
 
     ABT scanNode = make<ScanNode>("root", "c1");
 
+    // TODO: SERVER-68006 This test results in an failed assert when cardinality estimates change.
+    // The assert is in physical_rewriter.cpp:
+    // "Must optimize successfully if found compatible properties!"
+    PartialSchemaSelHints hints;
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+                  kDefaultSelectivity);
+
     ABT projectionANode = make<EvaluationNode>(
         "pa",
         make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
@@ -3895,6 +3972,7 @@ TEST(PhysRewriter, IndexPartitioning) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{
                {},
@@ -3906,6 +3984,8 @@ TEST(PhysRewriter, IndexPartitioning) {
                      {}}}},
                {DistributionType::HashPartitioning, makeSeq(makeNonMultikeyIndexPath("b"))}}}},
          5 /*numberOfPartitions*/},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
@@ -3973,6 +4053,18 @@ TEST(PhysRewriter, IndexPartitioning1) {
 
     ABT scanNode = make<ScanNode>("root", "c1");
 
+    // TODO: SERVER-68006 This test results in an failed assert when cardinality estimates change.
+    // The assert is in physical_rewriter.cpp:
+    // "Must optimize successfully if found compatible properties!"
+    // The interesting thing is that it doesn't fail on each test run, but sometimes, which
+    // makes one think the failure depends on rounding errors of cost/cardinality estimates,
+    // which in turn results in different sub-plans.
+    PartialSchemaSelHints hints;
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+                  kDefaultSelectivity);
+    hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+                  kDefaultSelectivity);
+
     ABT projectionANode = make<EvaluationNode>(
         "pa",
         make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
@@ -4006,6 +4098,7 @@ TEST(PhysRewriter, IndexPartitioning1) {
          OptPhaseManager::OptPhase::MemoExplorationPhase,
          OptPhaseManager::OptPhase::MemoImplementationPhase},
         prefixId,
+        false /*requireRID*/,
         {{{"c1",
            ScanDefinition{
                {},
@@ -4023,6 +4116,8 @@ TEST(PhysRewriter, IndexPartitioning1) {
                      {}}}},
                {DistributionType::HashPartitioning, makeSeq(makeNonMultikeyIndexPath("c"))}}}},
          5 /*numberOfPartitions*/},
+        std::make_unique<HintedCE>(std::move(hints)),
+        std::make_unique<DefaultCosting>(),
         {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
 
     ABT optimized = rootNode;
diff --git a/src/mongo/db/query/ce/ce_math.cpp b/src/mongo/db/query/optimizer/utils/ce_math.cpp
index a5dd7bf04de..b56adb4d860 100644
--- a/src/mongo/db/query/ce/ce_math.cpp
+++ b/src/mongo/db/query/optimizer/utils/ce_math.cpp
@@ -27,19 +27,31 @@
  *    it in the license file.
  */
 
-#include <algorithm>
-#include <cmath>
-#include <functional>
+#include <algorithm>   // std::sort
+#include <cmath>       // std::pow
+#include <functional>  // std::greater
 
-#include "mongo/db/query/ce/ce_math.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
 
 namespace mongo::ce {
-double conjExponentialBackoff(std::vector<double> conjSelectivities) {
-    std::sort(conjSelectivities.begin(), conjSelectivities.end());
-    double sel = conjSelectivities[0];
-    double f = 1.0;
+
+bool validSelectivity(SelectivityType sel) {
+    return (sel >= 0.0 && sel <= 1.0);
+}
+
+bool validCardinality(CEType card) {
+    return (card >= kMinCard && card <= std::numeric_limits<CEType>::max());
+}
+
+SelectivityType conjExponentialBackoff(std::vector<SelectivityType> conjSelectivities) {
+    size_t actualMaxBackoffElements = std::min(conjSelectivities.size(), kMaxBackoffElements);
+    std::partial_sort(conjSelectivities.begin(),
+                      conjSelectivities.begin() + actualMaxBackoffElements,
+                      conjSelectivities.end());
+    SelectivityType sel = conjSelectivities[0];
+    SelectivityType f = 1.0;
     size_t i = 1;
-    while (i < conjSelectivities.size() && i <= kMaxBackoffIterations) {
+    while (i < actualMaxBackoffElements) {
         f /= 2.0;
         sel *= std::pow(conjSelectivities[i], f);
         i++;
@@ -47,12 +59,16 @@ double conjExponentialBackoff(std::vector<double> conjSelectivities) {
     return sel;
 }
 
-double disjExponentialBackoff(std::vector<double> disjSelectivities) {
-    std::sort(disjSelectivities.begin(), disjSelectivities.end(), std::greater<double>());
-    double sel = 1.0 - disjSelectivities[0];
-    double f = 1.0;
+SelectivityType disjExponentialBackoff(std::vector<SelectivityType> disjSelectivities) {
+    size_t actualMaxBackoffElements = std::min(disjSelectivities.size(), kMaxBackoffElements);
+    std::partial_sort(disjSelectivities.begin(),
+                      disjSelectivities.begin() + actualMaxBackoffElements,
+                      disjSelectivities.end(),
+                      std::greater<SelectivityType>());
+    SelectivityType sel = 1.0 - disjSelectivities[0];
+    SelectivityType f = 1.0;
     size_t i = 1;
-    while (i < disjSelectivities.size() && i <= kMaxBackoffIterations) {
+    while (i < actualMaxBackoffElements) {
         f /= 2.0;
         sel *= std::pow(1 - disjSelectivities[i], f);
         i++;
diff --git a/src/mongo/db/query/ce/ce_math.h b/src/mongo/db/query/optimizer/utils/ce_math.h
index 3ef529a9224..a29a7d1f34b 100644
--- a/src/mongo/db/query/ce/ce_math.h
+++ b/src/mongo/db/query/optimizer/utils/ce_math.h
@@ -29,24 +29,43 @@
 
 #pragma once
 
+#include <limits>
 #include <vector>
 
+#include "mongo/db/query/optimizer/defs.h"
+
 namespace mongo::ce {
 
+using namespace mongo::optimizer;
+
+// Default cardinality when actual collection cardinality is unknown.
+// Mostly used by unit tests.
+constexpr CEType kDefaultCard = 1000.00;
+
+// Minimum estimated cardinality. In the absense of any statistics we can never
+// assume there are less than this many matching documents.
+constexpr CEType kMinCard = 0.01;
+
 /**
- * Specifies the maximum number of iterations to use when estimating via exponential backoff.
+ * Specifies the maximum number of elements (selectivities) to use when estimating via
+ * exponential backoff.
  */
-const size_t kMaxBackoffIterations = 3;
+const size_t kMaxBackoffElements = 4;
+
+
+bool validSelectivity(SelectivityType sel);
+
+bool validCardinality(CEType card);
 
 /**
  * Estimates the selectivity of a conjunction given the selectivities of its subexpressions using
  * exponential backoff.
  */
-double conjExponentialBackoff(std::vector<double> conjSelectivities);
+SelectivityType conjExponentialBackoff(std::vector<SelectivityType> conjSelectivities);
 
 /**
  * Estimates the selectivity of a disjunction given the selectivities of its subexpressions using
  * exponential backoff.
  */
-double disjExponentialBackoff(std::vector<double> disjSelectivities);
+SelectivityType disjExponentialBackoff(std::vector<SelectivityType> disjSelectivities);
 }  // namespace mongo::ce
author	Timour Katchaounov <timour.katchaounov@mongodb.com>	2022-07-19 09:57:59 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2022-07-19 10:30:38 +0000
commit	4af213f08a582a90534f5f98a9cb1d0976e4945b (patch)
tree	dc6b991beacaa4c91dfe5a1d7d9874a548bf946a
parent	0fa5c9a8075ef9d27d3d94b68876a6fbd5afd54f (diff)
download	mongo-4af213f08a582a90534f5f98a9cb1d0976e4945b.tar.gz