summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimour Katchaounov <timour.katchaounov@mongodb.com>2022-07-19 09:57:59 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-19 10:30:38 +0000
commit4af213f08a582a90534f5f98a9cb1d0976e4945b (patch)
treedc6b991beacaa4c91dfe5a1d7d9874a548bf946a
parent0fa5c9a8075ef9d27d3d94b68876a6fbd5afd54f (diff)
downloadmongo-4af213f08a582a90534f5f98a9cb1d0976e4945b.tar.gz
SERVER-67166 Heuristic CE for SargableNode
-rw-r--r--src/mongo/db/commands/cqf/cqf_aggregate.cpp2
-rw-r--r--src/mongo/db/query/ce/SConscript11
-rw-r--r--src/mongo/db/query/ce/ce_heuristic_test.cpp218
-rw-r--r--src/mongo/db/query/ce/ce_histogram.cpp18
-rw-r--r--src/mongo/db/query/ce/ce_histogram.h2
-rw-r--r--src/mongo/db/query/ce/ce_histogram_test.cpp12
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.cpp48
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.h37
-rw-r--r--src/mongo/db/query/optimizer/SConscript1
-rw-r--r--src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp136
-rw-r--r--src/mongo/db/query/optimizer/interval_intersection_test.cpp4
-rw-r--r--src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp8
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp125
-rw-r--r--src/mongo/db/query/optimizer/utils/ce_math.cpp (renamed from src/mongo/db/query/ce/ce_math.cpp)44
-rw-r--r--src/mongo/db/query/optimizer/utils/ce_math.h (renamed from src/mongo/db/query/ce/ce_math.h)27
15 files changed, 604 insertions, 89 deletions
diff --git a/src/mongo/db/commands/cqf/cqf_aggregate.cpp b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
index c51302d28a8..304ea553f69 100644
--- a/src/mongo/db/commands/cqf/cqf_aggregate.cpp
+++ b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
@@ -493,7 +493,7 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
} else if (internalQueryCardinalityEstimatorMode == ce::kHistogram &&
ce::CollectionStatistics::hasCollectionStatistics(nss)) {
const auto& stats = ce::CollectionStatistics::getCollectionStatistics(nss);
- auto ceDerivation = std::make_unique<CEHistogramTransport>(opCtx, stats);
+ auto ceDerivation = std::make_unique<CEHistogramTransport>(stats);
OptPhaseManager phaseManager{OptPhaseManager::getAllRewritesSet(),
prefixId,
false /*requireRID*/,
diff --git a/src/mongo/db/query/ce/SConscript b/src/mongo/db/query/ce/SConscript
index 4941d74512d..46c8a254e75 100644
--- a/src/mongo/db/query/ce/SConscript
+++ b/src/mongo/db/query/ce/SConscript
@@ -9,7 +9,6 @@ env.Library(
source=[
'array_histogram.cpp',
'ce_histogram.cpp',
- 'ce_math.cpp',
'ce_sampling.cpp',
'ce_estimation.cpp',
'collection_statistics.cpp',
@@ -46,3 +45,13 @@ env.CppUnitTest(
'ce_test_utils',
],
)
+
+env.CppUnitTest(
+ target="ce_heuristic_test",
+ source=[
+ "ce_heuristic_test.cpp",
+ ],
+ LIBDEPS=[
+ 'ce_test_utils',
+ ],
+)
diff --git a/src/mongo/db/query/ce/ce_heuristic_test.cpp b/src/mongo/db/query/ce/ce_heuristic_test.cpp
new file mode 100644
index 00000000000..f8a98c1001e
--- /dev/null
+++ b/src/mongo/db/query/ce/ce_heuristic_test.cpp
@@ -0,0 +1,218 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <string>
+
+#include "mongo/db/query/ce/ce_test_utils.h"
+#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
+#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
+#include "mongo/db/query/optimizer/cascades/memo.h"
+#include "mongo/db/query/optimizer/defs.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/metadata.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/db/query/optimizer/utils/utils.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo::ce {
+namespace {
+
+using namespace optimizer;
+using namespace optimizer::cascades;
+
+constexpr double kCollCard = 10000.0;
+const std::string collName = "test";
+
+class HeuristicCETester : public CETester {
+public:
+ HeuristicCETester(std::string collName) : CETester(collName, 0) {}
+
+protected:
+ std::unique_ptr<CEInterface> getCETransport() const override {
+ return std::make_unique<HeuristicCE>();
+ }
+};
+
+HeuristicCETester ht(collName);
+
+TEST(CEHeuristicTest, CEwithoutOptimization) {
+ std::string query = "{a0 : {$gt : 14, $lt : 21}}";
+ ht.setCollCard(kCollCard);
+ double ce = ht.getCE(query, 0);
+ ASSERT_APPROX_EQUAL(100.0, ce, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Eq) {
+ std::string query = "{a : 123}";
+ ASSERT_MATCH_CE_CARD(ht, query, 0.0, 0.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.73205, 3.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 2.64575, 7.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 10.0, 100.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 100.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt) {
+ std::string query = "{a: {$gt: 44}}";
+ ASSERT_MATCH_CE_CARD(ht, query, 0.01, 0.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 0.7, 1.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 6.3, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 44.55, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 330, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_Gt_Lt) {
+ std::string query = "{a: {$gt: 44, $lt: 99}}";
+ ASSERT_MATCH_CE_CARD(ht, query, 0.585662, 1.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 5.27096, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 29.885, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 189.571, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND2Eq) {
+ std::string query = "{a : 13, b : 42}";
+ ASSERT_MATCH_CE_CARD(ht, query, 1.31607, 3.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.62658, 7.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 10.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 100.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 10.0, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_AND3Eq) {
+ std::string query = "{a : 13, b : 42, c : 69}";
+ ASSERT_MATCH_CE_CARD(ht, query, 1.1472, 3.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.27537, 7.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.33352, 10.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 1.77828, 100.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 3.16228, 10000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR1path) {
+ std::string query = "{$or: [{a0: {$gt: 44}}, {a0: {$lt: 9}}]}";
+ ASSERT_MATCH_CE_CARD(ht, query, 7.52115, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 58.6188, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 451.581, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_OR2paths) {
+ std::string query = "{$or: [{a0: {$gt:44}}, {b0: {$lt: 9}}]}";
+ // TODO: Disjunctions on different paths are not SARGable, and are represented as a single
+ // FilterNode. FilterNode is currenlty estimated to be always 10% selectivity.
+ ASSERT_MATCH_CE_CARD(ht, query, 0.9, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 9.9, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 100.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathSimple) {
+ std::string query =
+ "{$or: ["
+ "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 44}}]}"
+ "]}";
+ ASSERT_MATCH_CE_CARD(ht, query, 6.59965, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 41.2515, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 270.42, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF1pathComplex) {
+ // Each disjunct has different number of conjuncts, so that its selectivity is different.
+ // We need 5 disjuncts to test exponential backoff which cuts off at the first 4.
+ // The conjuncts are in selectivity order.
+ std::string query1 =
+ "{$or: ["
+ "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+ "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+ "81}}]},"
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+ "81}}, {a0: {$lt: 77}}]}"
+ "]}";
+ auto ce1 = ht.getCE(query1);
+ // The conjuncts are in inverse selectivity order.
+ std::string query2 =
+ "{$or: ["
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+ "81}}, {a0: {$lt: 77}}]},"
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}, {a0: {$lt: "
+ "81}}]},"
+ "{$and: [{a0: {$gt:40}}, {a0: {$lt: 99}}, {a0: {$gt: 42}}, {a0: {$lt: 88}}]},"
+ "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}, {a0: {$gt: 42}}]},"
+ "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]}"
+ "]}";
+ auto ce2 = ht.getCE(query2);
+ ASSERT_APPROX_EQUAL(ce1, ce2, kMaxCEError);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_DNF2paths) {
+ // TODO: Disjunctions on different paths are not SARGable, and are represented as a single
+ // FilterNode. FilterNode is currently estimated to be always 10% selectivity.
+ std::string query =
+ "{$or: ["
+ "{$and: [{a0: {$gt: 9}}, {a0: {$lt: 12}}]},"
+ "{$and: [{b0: {$gt:40}}, {b0: {$lt: 44}}]}"
+ "]}";
+ ASSERT_MATCH_CE_CARD(ht, query, 0.9, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 9.9, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 100.0, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF1path) {
+ std::string query =
+ "{$and : ["
+ "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
+ "{$or : [ {a0 : {$gt : 77}}, {a0 : {$eq : 51}} ]}"
+ "]}";
+ ASSERT_MATCH_CE_CARD(ht, query, 6.87663, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 42.7435, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 275.419, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionPhase_CNF2paths) {
+ std::string query =
+ "{$and : ["
+ "{$or : [ {a0 : {$gt : 11}}, {a0 : {$lt : 44}} ]},"
+ "{$or : [ {b0 : {$gt : 77}}, {b0 : {$eq : 51}} ]}"
+ "]}";
+ ASSERT_MATCH_CE_CARD(ht, query, 6.21212, 9.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 36.4418, 99.0);
+ ASSERT_MATCH_CE_CARD(ht, query, 228.935, 1000.0);
+}
+
+TEST(CEHeuristicTest, CEAfterMemoSubstitutionExplorationPhases) {
+ std::string query = "{a : 13, b : 42}";
+ ht.setCollCard(kCollCard);
+ double ce = ht.getCE(query, 2);
+ ASSERT_APPROX_EQUAL(10.0, ce, kMaxCEError);
+}
+
+} // namespace
+} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_histogram.cpp b/src/mongo/db/query/ce/ce_histogram.cpp
index 1a04b740333..7ce299b4707 100644
--- a/src/mongo/db/query/ce/ce_histogram.cpp
+++ b/src/mongo/db/query/ce/ce_histogram.cpp
@@ -31,11 +31,11 @@
#include "mongo/db/query/ce/ce_estimation.h"
#include "mongo/db/query/ce/ce_histogram.h"
-#include "mongo/db/query/ce/ce_math.h"
#include "mongo/db/query/ce/collection_statistics.h"
#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
#include "mongo/db/query/optimizer/utils/abt_hash.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
#include "mongo/db/query/optimizer/utils/memo_utils.h"
namespace mongo::optimizer::cascades {
@@ -44,8 +44,8 @@ using namespace properties;
class CEHistogramTransportImpl {
public:
- CEHistogramTransportImpl(OperationContext* opCtx, const ce::CollectionStatistics& stats)
- : _opCtx(opCtx), _heuristicCE(), _stats(stats) {}
+ CEHistogramTransportImpl(const ce::CollectionStatistics& stats)
+ : _heuristicCE(), _stats(stats) {}
~CEHistogramTransportImpl() {}
@@ -101,16 +101,16 @@ public:
conjSelectivities.push_back(cardinality / _stats.getCardinality());
}
- auto backoff = ce::conjExponentialBackoff(conjSelectivities);
+ auto backoff = ce::conjExponentialBackoff(std::move(conjSelectivities));
disjSelectivities.push_back(backoff);
}
- auto backoff = ce::disjExponentialBackoff(disjSelectivities);
+ auto backoff = ce::disjExponentialBackoff(std::move(disjSelectivities));
topLevelSelectivities.push_back(backoff);
}
// The elements of the PartialSchemaRequirements map represent an implicit conjunction.
- auto backoff = ce::conjExponentialBackoff(topLevelSelectivities);
+ auto backoff = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
return backoff * childResult;
}
@@ -140,14 +140,12 @@ public:
}
private:
- OperationContext* _opCtx;
HeuristicCE _heuristicCE;
const ce::CollectionStatistics& _stats;
};
-CEHistogramTransport::CEHistogramTransport(OperationContext* opCtx,
- const ce::CollectionStatistics& stats)
- : _impl(std::make_unique<CEHistogramTransportImpl>(opCtx, stats)) {}
+CEHistogramTransport::CEHistogramTransport(const ce::CollectionStatistics& stats)
+ : _impl(std::make_unique<CEHistogramTransportImpl>(stats)) {}
CEHistogramTransport::~CEHistogramTransport() {}
diff --git a/src/mongo/db/query/ce/ce_histogram.h b/src/mongo/db/query/ce/ce_histogram.h
index 6132cc19339..dfc556ebf87 100644
--- a/src/mongo/db/query/ce/ce_histogram.h
+++ b/src/mongo/db/query/ce/ce_histogram.h
@@ -38,7 +38,7 @@ class CEHistogramTransportImpl;
class CEHistogramTransport : public CEInterface {
public:
- CEHistogramTransport(OperationContext* opCtx, const ce::CollectionStatistics& stats);
+ CEHistogramTransport(const ce::CollectionStatistics& stats);
~CEHistogramTransport();
CEType deriveCE(const Memo& memo,
diff --git a/src/mongo/db/query/ce/ce_histogram_test.cpp b/src/mongo/db/query/ce/ce_histogram_test.cpp
index 76a32ef5fa0..3be39685e83 100644
--- a/src/mongo/db/query/ce/ce_histogram_test.cpp
+++ b/src/mongo/db/query/ce/ce_histogram_test.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#include <iostream>
+
#include "mongo/db/query/ce/ce_estimation.h"
#include "mongo/db/query/ce/ce_histogram.h"
#include "mongo/db/query/ce/ce_test_utils.h"
@@ -45,8 +47,8 @@ public:
: CETester(collName, numRecords), _stats{stats} {}
protected:
- std::unique_ptr<CEInterface> getCETransport(OperationContext* opCtx) override {
- return std::make_unique<CEHistogramTransport>(opCtx, _stats);
+ std::unique_ptr<CEInterface> getCETransport() const override {
+ return std::make_unique<CEHistogramTransport>(_stats);
}
private:
@@ -118,7 +120,7 @@ TEST(CEHistogramTest, AssertSmallMaxDiffHistogramEstimatesAtomicPredicates) {
ASSERT_MATCH_CE(t, "{a: {$eq: \"foo\"}}", 0.0);
// Test case when field doesn't match fieldpath of histogram. This falls back to heuristics.
- ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 0.8);
+ ASSERT_MATCH_CE(t, "{b: {$eq: 1}}", 2.82843);
// Test $gt.
ASSERT_MATCH_CE(t, "{a: {$gt: 3}}", 0.0);
@@ -201,8 +203,8 @@ TEST(CEHistogramTest, AssertSmallHistogramEstimatesComplexPredicates) {
// Test conjunctions over multiple fields for which we may not have histograms. This falls back
// to heuristic estimation.
- ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 0.09);
- ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 0.09);
+ ASSERT_MATCH_CE(t, "{a: {$eq: 2}, c: {$eq: 1}}", 1.73205);
+ ASSERT_MATCH_CE(t, "{c: {$eq: 2}, d: {$eq: 22}}", 1.73205);
}
TEST(CEHistogramTest, SanityTestEmptyHistogram) {
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp
index eb7824eb483..b35e36928dc 100644
--- a/src/mongo/db/query/ce/ce_test_utils.cpp
+++ b/src/mongo/db/query/ce/ce_test_utils.cpp
@@ -27,9 +27,12 @@
* it in the license file.
*/
+#include <cstddef>
+
#include "mongo/db/query/ce/ce_test_utils.h"
#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
+#include "mongo/db/query/optimizer/cascades/cost_derivation.h"
#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
#include "mongo/db/query/optimizer/explain.h"
#include "mongo/db/query/optimizer/opt_phase_manager.h"
@@ -41,27 +44,31 @@ namespace mongo::ce {
using namespace optimizer;
using namespace cascades;
-CETester::CETester(std::string collName, double numRecords)
- : _collName(std::move(collName)), _numRecords(std::move(numRecords)) {}
-
-double CETester::getCE(const std::string& query) {
- // Mock opCtx for test.
- QueryTestServiceContext serviceContext;
- auto opCtx = serviceContext.makeOperationContext();
+CETester::CETester(std::string collName, double collCard)
+ : _collName(std::move(collName)), _collCard(collCard) {}
+double CETester::getCE(const std::string& query, size_t optimizationLevel) const {
// Mock memo.
- ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _numRecords);
+ ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _collCard);
Metadata metadata({{_collName, sd}});
- Memo memo(DebugInfo::kDefaultForTests,
- metadata,
- std::make_unique<DefaultLogicalPropsDerivation>(),
- std::make_unique<HeuristicCE>());
- // Construct placeholder PhaseManager.
+ std::vector<OptPhaseManager::OptPhase> optPhaseChoices{
+ OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase};
+ optimizationLevel = std::min(optimizationLevel, optPhaseChoices.size());
+ OptPhaseManager::PhaseSet optPhases;
+ for (size_t i = 0; i < optimizationLevel; ++i) {
+ optPhases.insert(optPhaseChoices[i]);
+ }
+
+ // Construct placeholder PhaseManager. Notice that it also creates a Memo internally.
PrefixId prefixId;
- OptPhaseManager phaseManager({OptPhaseManager::OptPhase::MemoSubstitutionPhase},
+ OptPhaseManager phaseManager(optPhases,
prefixId,
- {{{_collName, {{}, {}}}}},
+ true /*requireRID*/,
+ metadata,
+ std::make_unique<HeuristicCE>(),
+ std::make_unique<DefaultCosting>(),
DebugInfo::kDefaultForTests);
// Construct ABT from pipeline and optimize.
@@ -69,8 +76,15 @@ double CETester::getCE(const std::string& query) {
ASSERT_TRUE(phaseManager.optimize(abt));
// Get cardinality estimate.
- auto cht = getCETransport(opCtx.get());
- return cht->deriveCE(memo, {}, abt.ref());
+ auto cht = getCETransport();
+ auto ce = cht->deriveCE(phaseManager.getMemo(), {}, abt.ref());
+
+#ifdef CE_TEST_LOG_MODE
+ std::cout << "Query: " << query << ", card: " << _collCard << ", Estimated: " << ce
+ << std::endl;
+#endif
+
+ return ce;
}
} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h
index e6fc660e27b..4d21e834a4e 100644
--- a/src/mongo/db/query/ce/ce_test_utils.h
+++ b/src/mongo/db/query/ce/ce_test_utils.h
@@ -29,6 +29,9 @@
#pragma once
+#include <cstddef>
+#include <sys/types.h>
+
#include "mongo/db/query/optimizer/cascades/interfaces.h"
namespace mongo {
@@ -44,14 +47,35 @@ class CEInterface;
namespace ce {
+// Enable this flag to log all estimates, and let all tests pass.
+//#define CE_TEST_LOG_MODE 1
+
const double kMaxCEError = 0.01;
/**
- * A helpful macro for asserting that the CE of a $match predicate is approximately what we were
+ * Helpful macros for asserting that the CE of a $match predicate is approximately what we were
* expecting.
*/
+
+#ifndef CE_TEST_LOG_MODE
#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
ASSERT_APPROX_EQUAL(expectedCE, ce.getCE(predicate), kMaxCEError)
+#else
+#define ASSERT_MATCH_CE(ce, predicate, expectedCE) \
+ ce.getCE(predicate); \
+ ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError)
+#endif
+
+#ifndef CE_TEST_LOG_MODE
+#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
+ ce.setCollCard(collCard); \
+ ASSERT_APPROX_EQUAL(expectedCE, ce.getCE(predicate), kMaxCEError)
+#else
+#define ASSERT_MATCH_CE_CARD(ce, predicate, expectedCE, collCard) \
+ ce.setCollCard(collCard); \
+ ce.getCE(predicate); \
+ ASSERT_APPROX_EQUAL(1.0, 1.0, kMaxCEError)
+#endif
/**
* A test utility class for helping verify the cardinality of CE transports on a given $match
@@ -64,19 +88,22 @@ public:
/**
* Returns the estimated cardinality of a given 'matchPredicate'.
*/
- double getCE(const std::string& matchPredicate);
+ double getCE(const std::string& matchPredicate, size_t optimizationLevel = 1) const;
+
+ void setCollCard(double card) {
+ _collCard = card;
+ }
protected:
/**
* Subclasses need to override this method to initialize the transports they are testing.
*/
- virtual std::unique_ptr<optimizer::cascades::CEInterface> getCETransport(
- OperationContext* opCtx) = 0;
+ virtual std::unique_ptr<optimizer::cascades::CEInterface> getCETransport() const = 0;
std::string _collName;
// The number of records in the collection we are testing.
- double _numRecords;
+ double _collCard;
};
} // namespace ce
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index 02b3b90bcd0..c050a0d79e7 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -39,6 +39,7 @@ env.Library(
"rewrites/path_lower.cpp",
"syntax/expr.cpp",
"utils/abt_hash.cpp",
+ 'utils/ce_math.cpp',
"utils/interval_utils.cpp",
"utils/memo_utils.cpp",
"utils/utils.cpp",
diff --git a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
index 6d89cd1a436..802c1d24892 100644
--- a/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
+++ b/src/mongo/db/query/optimizer/cascades/ce_heuristic.cpp
@@ -28,18 +28,26 @@
*/
#include "mongo/db/query/optimizer/cascades/ce_heuristic.h"
-#include "mongo/db/query/optimizer/utils/memo_utils.h"
+
+#include "mongo/db/query/optimizer/cascades/memo.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
+#include "mongo/util/assert_util.h"
namespace mongo::optimizer::cascades {
using namespace properties;
+using namespace mongo::ce;
+
+// Invalid estimate - an arbitrary negative value used for initialization.
+constexpr SelectivityType kInvalidSel = -1.0;
+constexpr SelectivityType kDefaultFilterSel = 0.1;
class CEHeuristicTransport {
public:
CEType transport(const ScanNode& node, CEType /*bindResult*/) {
// Default cardinality estimate.
const CEType metadataCE = _memo.getMetadata()._scanDefs.at(node.getScanDefName()).getCE();
- return (metadataCE < 0.0) ? 1000.00 : metadataCE;
+ return (metadataCE < 0.0) ? kDefaultCard : metadataCE;
}
CEType transport(const ValueScanNode& node, CEType /*bindResult*/) {
@@ -53,6 +61,9 @@ public:
}
CEType transport(const FilterNode& node, CEType childResult, CEType /*exprResult*/) {
+ if (childResult == 0.0) {
+ return 0.0;
+ }
if (node.getFilter() == Constant::boolean(true)) {
// Trivially true filter.
return childResult;
@@ -61,7 +72,7 @@ public:
return 0.0;
} else {
// Estimate filter selectivity at 0.1.
- return 0.1 * childResult;
+ return std::max(kDefaultFilterSel * childResult, kMinCard);
}
}
@@ -70,16 +81,120 @@ public:
return childResult;
}
+ /**
+ * Default selectivity of equalities. To avoid super small selectivities for small
+ * cardinalities, that would result in 0 cardinality for many small inputs, the
+ * estimate is scaled as inputCard grows. The bigger inputCard, the smaller the
+ * selectivity.
+ */
+ SelectivityType equalitySel(const CEType inputCard) {
+ uassert(6716604, "Zero cardinality must be handled by the caller.", inputCard > 0.0);
+ return std::sqrt(inputCard) / inputCard;
+ }
+
+ /**
+ * Default selectivity of intervals with bounds on both ends. These intervals are
+ * considered less selective than equalities.
+ * Examples: (a > 'abc' AND a < 'hta'), (0 < b <= 13)
+ */
+ SelectivityType closedRangeSel(const CEType inputCard) {
+ CEType sel = kInvalidSel;
+ if (inputCard < 20.0) {
+ sel = 0.50;
+ } else if (inputCard < 100.0) {
+ sel = 0.33;
+ } else {
+ sel = 0.20;
+ }
+ return sel;
+ }
+
+ /**
+ * Default selectivity of intervals open on one end. These intervals are
+ * considered less selective than those with both ends specified by the user query.
+ * Examples: (a > 'xyz'), (b <= 13)
+ */
+ SelectivityType openRangeSel(const CEType inputCard) {
+ SelectivityType sel = kInvalidSel;
+ if (inputCard < 20.0) {
+ sel = 0.70;
+ } else if (inputCard < 100.0) {
+ sel = 0.45;
+ } else {
+ sel = 0.33;
+ }
+ return sel;
+ }
+
+ mongo::sbe::value::TypeTags boundType(const BoundRequirement& bound) {
+ const auto constBoundPtr = bound.getBound().cast<Constant>();
+ if (constBoundPtr == nullptr) {
+ return mongo::sbe::value::TypeTags::Nothing;
+ }
+ const auto [tag, val] = constBoundPtr->get();
+ return tag;
+ }
+
+ SelectivityType intervalSel(const IntervalRequirement& interval, const CEType inputCard) {
+ SelectivityType sel = kInvalidSel;
+ if (interval.isFullyOpen()) {
+ sel = 1.0;
+ } else if (interval.isEquality()) {
+ sel = equalitySel(inputCard);
+ } else if (interval.getHighBound().isPlusInf() || interval.getLowBound().isMinusInf() ||
+ boundType(interval.getLowBound()) != boundType(interval.getHighBound())) {
+ // The interval has an actual bound only on one of it ends if:
+ // - one of the bounds is infinite, or
+ // - both bounds are of a different type - this is the case when due to type bracketing
+ // one of the bounds is the lowest/highest value of the previous/next type.
+ // TODO: Notice that sometimes type bracketing uses a min/max value from the same type,
+ // so sometimes we may not detect an open-ended interval.
+ sel = openRangeSel(inputCard);
+ } else {
+ sel = closedRangeSel(inputCard);
+ }
+ uassert(6716603, "Invalid selectivity.", validSelectivity(sel));
+ return sel;
+ }
+
CEType transport(const SargableNode& node,
- CEType /*childResult*/,
+ CEType childResult,
CEType /*bindsResult*/,
CEType /*refsResult*/) {
- ABT lowered = node.getChild();
+ // Early out and return 0 since we don't expect to get more results.
+ if (childResult == 0.0) {
+ return 0.0;
+ }
+
+ SelectivityType topLevelSel = 1.0;
+ std::vector<SelectivityType> topLevelSelectivities;
for (const auto& [key, req] : node.getReqMap()) {
- // TODO: consider issuing one filter node per interval.
- lowerPartialSchemaRequirement(key, req, lowered);
+ SelectivityType disjSel = 1.0;
+ std::vector<SelectivityType> disjSelectivities;
+ // Intervals are in DNF.
+ const auto intervalDNF = req.getIntervals();
+ const auto disjuncts = intervalDNF.cast<IntervalReqExpr::Disjunction>()->nodes();
+ for (const auto& disjunct : disjuncts) {
+ const auto& conjuncts = disjunct.cast<IntervalReqExpr::Conjunction>()->nodes();
+ SelectivityType conjSel = 1.0;
+ std::vector<SelectivityType> conjSelectivities;
+ for (const auto& conjunct : conjuncts) {
+ const auto& interval = conjunct.cast<IntervalReqExpr::Atom>()->getExpr();
+ const SelectivityType sel = intervalSel(interval, childResult);
+ conjSelectivities.push_back(sel);
+ }
+ conjSel = ce::conjExponentialBackoff(std::move(conjSelectivities));
+ disjSelectivities.push_back(conjSel);
+ }
+ disjSel = ce::disjExponentialBackoff(std::move(disjSelectivities));
+ topLevelSelectivities.push_back(disjSel);
}
- return algebra::transport<false>(lowered, *this);
+
+ // The elements of the PartialSchemaRequirements map represent an implicit conjunction.
+ topLevelSel = ce::conjExponentialBackoff(std::move(topLevelSelectivities));
+ CEType card = std::max(topLevelSel * childResult, kMinCard);
+ uassert(6716602, "Invalid cardinality.", mongo::ce::validCardinality(card));
+ return card;
}
CEType transport(const RIDIntersectNode& node,
@@ -96,7 +211,7 @@ public:
CEType /*exprResult*/) {
const auto& filter = node.getFilter();
- double selectivity = 0.1;
+ SelectivityType selectivity = kDefaultFilterSel;
if (filter == Constant::boolean(false)) {
selectivity = 0.0;
} else if (filter == Constant::boolean(true)) {
@@ -194,7 +309,8 @@ private:
CEType HeuristicCE::deriveCE(const Memo& memo,
const LogicalProps& /*logicalProps*/,
const ABT::reference_type logicalNodeRef) const {
- return CEHeuristicTransport::derive(memo, logicalNodeRef);
+ CEType card = CEHeuristicTransport::derive(memo, logicalNodeRef);
+ return card;
}
} // namespace mongo::optimizer::cascades
diff --git a/src/mongo/db/query/optimizer/interval_intersection_test.cpp b/src/mongo/db/query/optimizer/interval_intersection_test.cpp
index a965e2600e2..33c2d5c4b5b 100644
--- a/src/mongo/db/query/optimizer/interval_intersection_test.cpp
+++ b/src/mongo/db/query/optimizer/interval_intersection_test.cpp
@@ -114,8 +114,8 @@ TEST(IntervalIntersection, SingleFieldIntersection) {
" BindBlock:\n"
" [rid_0]\n"
" Source []\n"},
- {"{$or: [{$and: [{a0: {$gt:9}}, {a0: {$lt: 12}}]}, {$and: [{a0: {$gt:40}}, {a0: {$lt: "
- "44}}]}]}",
+ {"{$or: [{$and: [{a0: {$gt:9, $lt:999}}, {a0: {$gt: 0, $lt: 12}}]},"
+ " {$and: [{a0: {$gt:40, $lt:997}}, {a0: {$gt:0, $lt: 44}}]}]}",
"Root []\n"
"| | projections: \n"
"| | scan_0\n"
diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
index a1468c7e8e7..6d0708d0703 100644
--- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
@@ -1440,12 +1440,12 @@ TEST(LogicalRewriter, SargableCE) {
" groupId: 1\n"
" | | Logical properties:\n"
" | | cardinalityEstimate: \n"
- " | | ce: 10\n"
+ " | | ce: 5.62341\n"
" | | requirementCEs: \n"
" | | refProjection: ptest, path: 'PathGet [a] PathIdentity []', ce: "
- "100\n"
+ "31.6228\n"
" | | refProjection: ptest, path: 'PathGet [b] PathIdentity []', ce: "
- "100\n"
+ "31.6228\n"
" | | projections: \n"
" | | ptest\n"
" | | indexingAvailability: \n"
@@ -1473,7 +1473,7 @@ TEST(LogicalRewriter, SargableCE) {
" groupId: 2\n"
" | | Logical properties:\n"
" | | cardinalityEstimate: \n"
- " | | ce: 10\n"
+ " | | ce: 5.62341\n"
" | | projections: \n"
" | | ptest\n"
" | | indexingAvailability: \n"
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 6f0a2654909..fc7cbf49485 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -40,6 +40,9 @@
namespace mongo::optimizer {
namespace {
+// Default selectivity of predicates used by HintedCE to force certain plans.
+constexpr double kDefaultSelectivity = 0.1;
+
TEST(PhysRewriter, PhysicalRewriterBasic) {
using namespace properties;
PrefixId prefixId;
@@ -1189,6 +1192,17 @@ TEST(PhysRewriter, FilterIndexing4) {
ABT scanNode = make<ScanNode>("root", "c1");
+ // TODO: SERVER-68006 will investigate why the plan changes without the hints.
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("c", make<PathIdentity>())},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("d", make<PathIdentity>())},
+ kDefaultSelectivity);
+
ABT evalNode = make<EvaluationNode>(
"pa",
make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
@@ -1232,6 +1246,7 @@ TEST(PhysRewriter, FilterIndexing4) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
@@ -1243,6 +1258,8 @@ TEST(PhysRewriter, FilterIndexing4) {
false /*isMultiKey*/,
{DistributionType::Centralized},
{}}}}}}}},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = std::move(rootNode);
@@ -1251,7 +1268,7 @@ TEST(PhysRewriter, FilterIndexing4) {
phaseManager.getHints()._disableHashJoinRIDIntersect = true;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(65, 80, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(65, 110, phaseManager.getMemo().getStats()._physPlanExplorationCount);
ASSERT_EXPLAIN_V2(
"Root []\n"
@@ -1342,7 +1359,7 @@ TEST(PhysRewriter, FilterIndexing5) {
ABT optimized = std::move(rootNode);
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(25, 55, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(25, 70, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// We can cover both fields with the index, and need separate sort on "b".
ASSERT_EXPLAIN_V2(
@@ -1502,7 +1519,7 @@ TEST(PhysRewriter, FilterIndexingStress) {
// Without the changes to restrict SargableNode split to which this test is tied, we would
// be exploring 2^kFilterCount plans, one for each created group.
- ASSERT_EQ(51, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_EQ(55, phaseManager.getMemo().getStats()._physPlanExplorationCount);
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
ASSERT_BSON_PATH("\"Filter\"", explainRoot, "child.nodeType");
@@ -1554,6 +1571,14 @@ TEST(PhysRewriter, FilterIndexingVariable) {
ABT scanNode = make<ScanNode>("root", "c1");
+ // TODO: SERVER-68006 will investigate why the plan changes without the hints.
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root",
+ make<PathGet>("a",
+ make<PathTraverse>(make<PathIdentity>(),
+ PathTraverse::kSingleLevel))},
+ kDefaultSelectivity);
+
// Encode a condition using two query parameters (expressed as functions):
// "a" > param_0 AND "a" >= param_1 (observe param_1 comparison is inclusive).
ABT filterNode = make<FilterNode>(
@@ -1575,11 +1600,14 @@ TEST(PhysRewriter, FilterIndexingVariable) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
{{"index1",
makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)}}}}}},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = std::move(rootNode);
@@ -1724,7 +1752,7 @@ TEST(PhysRewriter, FilterReorder) {
make<PathGet>(projName,
make<PathTraverse>(make<PathIdentity>(),
PathTraverse::kSingleLevel))},
- 0.1 * (kFilterCount - i));
+ kDefaultSelectivity * (kFilterCount - i));
result = make<FilterNode>(
make<EvalFilter>(make<PathGet>(std::move(projName),
make<PathTraverse>(make<PathCompare>(Operations::Eq,
@@ -2086,6 +2114,12 @@ TEST(PhysRewriter, MultiKeyIndex) {
using namespace properties;
PrefixId prefixId;
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+ kDefaultSelectivity);
+
ABT scanNode = make<ScanNode>("root", "c1");
ABT evalANode = make<EvaluationNode>(
@@ -2120,12 +2154,15 @@ TEST(PhysRewriter, MultiKeyIndex) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
{{"index1", makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)},
{"index2",
makeIndexDefinition("b", CollationOp::Descending, false /*isMultiKey*/)}}}}}},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
{
@@ -2135,7 +2172,7 @@ TEST(PhysRewriter, MultiKeyIndex) {
phaseManager.getHints()._disableHashJoinRIDIntersect = true;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(15, 25, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(13, 25, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// GroupBy+Union cannot propagate collation requirement, and we need a separate
// CollationNode.
@@ -2355,7 +2392,7 @@ TEST(PhysRewriter, CompoundIndex1) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(60, 110, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(60, 130, phaseManager.getMemo().getStats()._physPlanExplorationCount);
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
@@ -2443,7 +2480,7 @@ TEST(PhysRewriter, CompoundIndex2) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(100, 170, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(100, 210, phaseManager.getMemo().getStats()._physPlanExplorationCount);
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
@@ -2527,7 +2564,7 @@ TEST(PhysRewriter, CompoundIndex3) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(70, 110, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(70, 130, phaseManager.getMemo().getStats()._physPlanExplorationCount);
ASSERT_EXPLAIN_V2(
"Root []\n"
@@ -2932,6 +2969,18 @@ TEST(PhysRewriter, IndexBoundsIntersect3) {
using namespace properties;
PrefixId prefixId;
+ PartialSchemaSelHints hints;
+ hints.emplace(
+ PartialSchemaKey{
+ "root",
+ make<PathGet>(
+ "a",
+ make<PathTraverse>(
+ make<PathGet>(
+ "b", make<PathTraverse>(make<PathIdentity>(), PathTraverse::kSingleLevel)),
+ PathTraverse::kSingleLevel))},
+ kDefaultSelectivity);
+
ABT scanNode = make<ScanNode>("root", "c1");
ABT filterNode = make<FilterNode>(
@@ -2960,6 +3009,7 @@ TEST(PhysRewriter, IndexBoundsIntersect3) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
@@ -2967,6 +3017,8 @@ TEST(PhysRewriter, IndexBoundsIntersect3) {
IndexDefinition{{{makeIndexPath(FieldPathType{"a", "b"}, true /*isMultiKey*/),
CollationOp::Ascending}},
true /*isMultiKey*/}}}}}}},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
@@ -3159,10 +3211,10 @@ TEST(PhysRewriter, IndexResidualReq) {
// Make sure we can use the index to cover "b" while testing "b.c" with a separate filter.
ASSERT_EXPLAIN_PROPS_V2(
- "Properties [cost: 0.070002, localCost: 0, adjustedCE: 10]\n"
+ "Properties [cost: 0.402121, localCost: 0, adjustedCE: 189.571]\n"
"| | Logical:\n"
"| | cardinalityEstimate: \n"
- "| | ce: 10\n"
+ "| | ce: 189.571\n"
"| | projections: \n"
"| | pa\n"
"| | root\n"
@@ -3183,14 +3235,14 @@ TEST(PhysRewriter, IndexResidualReq) {
"| | pa\n"
"| RefBlock: \n"
"| Variable [pa]\n"
- "Properties [cost: 0.070002, localCost: 0.070002, adjustedCE: 10]\n"
+ "Properties [cost: 0.402121, localCost: 0.402121, adjustedCE: 189.571]\n"
"| | Logical:\n"
"| | cardinalityEstimate: \n"
- "| | ce: 10\n"
+ "| | ce: 189.571\n"
"| | requirementCEs: \n"
- "| | refProjection: root, path: 'PathGet [a] PathIdentity []', ce: 100\n"
+ "| | refProjection: root, path: 'PathGet [a] PathIdentity []', ce: 330\n"
"| | refProjection: root, path: 'PathGet [b] PathGet [c] PathIdentity []', "
- "ce: 100\n"
+ "ce: 330\n"
"| | projections: \n"
"| | pa\n"
"| | root\n"
@@ -3286,7 +3338,7 @@ TEST(PhysRewriter, IndexResidualReq1) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(25, 30, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(25, 45, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Prefer index1 over index2 and index3 in order to cover all fields.
ASSERT_EXPLAIN_V2(
@@ -3322,6 +3374,19 @@ TEST(PhysRewriter, IndexResidualReq2) {
ABT scanNode = make<ScanNode>("root", "c1");
+ // TODO: SERVER-68006 will investigate why the plan changes without the hints.
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root",
+ make<PathGet>("a",
+ make<PathTraverse>(make<PathIdentity>(),
+ PathTraverse::kSingleLevel))},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root",
+ make<PathGet>("b",
+ make<PathTraverse>(make<PathIdentity>(),
+ PathTraverse::kSingleLevel))},
+ kDefaultSelectivity);
+
ABT filterANode = make<FilterNode>(
make<EvalFilter>(
make<PathGet>("a",
@@ -3346,6 +3411,7 @@ TEST(PhysRewriter, IndexResidualReq2) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{{},
{{"index1",
@@ -3353,6 +3419,8 @@ TEST(PhysRewriter, IndexResidualReq2) {
{{"a", CollationOp::Ascending, true /*isMultiKey*/},
{"c", CollationOp::Ascending, true /*isMultiKey*/},
{"b", CollationOp::Ascending, true /*isMultiKey*/}})}}}}}},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
@@ -3862,6 +3930,15 @@ TEST(PhysRewriter, IndexPartitioning) {
ABT scanNode = make<ScanNode>("root", "c1");
+ // TODO: SERVER-68006 This test results in an failed assert when cardinality estimates change.
+ // The assert is in physical_rewriter.cpp:
+ // "Must optimize successfully if found compatible properties!"
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+ kDefaultSelectivity);
+
ABT projectionANode = make<EvaluationNode>(
"pa",
make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
@@ -3895,6 +3972,7 @@ TEST(PhysRewriter, IndexPartitioning) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
@@ -3906,6 +3984,8 @@ TEST(PhysRewriter, IndexPartitioning) {
{}}}},
{DistributionType::HashPartitioning, makeSeq(makeNonMultikeyIndexPath("b"))}}}},
5 /*numberOfPartitions*/},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
@@ -3973,6 +4053,18 @@ TEST(PhysRewriter, IndexPartitioning1) {
ABT scanNode = make<ScanNode>("root", "c1");
+ // TODO: SERVER-68006 This test results in an failed assert when cardinality estimates change.
+ // The assert is in physical_rewriter.cpp:
+ // "Must optimize successfully if found compatible properties!"
+ // The interesting thing is that it doesn't fail on each test run, but sometimes, which
+ // makes one think the failure depends on rounding errors of cost/cardinality estimates,
+ // which in turn results in different sub-plans.
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())},
+ kDefaultSelectivity);
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("b", make<PathIdentity>())},
+ kDefaultSelectivity);
+
ABT projectionANode = make<EvaluationNode>(
"pa",
make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
@@ -4006,6 +4098,7 @@ TEST(PhysRewriter, IndexPartitioning1) {
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
@@ -4023,6 +4116,8 @@ TEST(PhysRewriter, IndexPartitioning1) {
{}}}},
{DistributionType::HashPartitioning, makeSeq(makeNonMultikeyIndexPath("c"))}}}},
5 /*numberOfPartitions*/},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
diff --git a/src/mongo/db/query/ce/ce_math.cpp b/src/mongo/db/query/optimizer/utils/ce_math.cpp
index a5dd7bf04de..b56adb4d860 100644
--- a/src/mongo/db/query/ce/ce_math.cpp
+++ b/src/mongo/db/query/optimizer/utils/ce_math.cpp
@@ -27,19 +27,31 @@
* it in the license file.
*/
-#include <algorithm>
-#include <cmath>
-#include <functional>
+#include <algorithm> // std::sort
+#include <cmath> // std::pow
+#include <functional> // std::greater
-#include "mongo/db/query/ce/ce_math.h"
+#include "mongo/db/query/optimizer/utils/ce_math.h"
namespace mongo::ce {
-double conjExponentialBackoff(std::vector<double> conjSelectivities) {
- std::sort(conjSelectivities.begin(), conjSelectivities.end());
- double sel = conjSelectivities[0];
- double f = 1.0;
+
+bool validSelectivity(SelectivityType sel) {
+ return (sel >= 0.0 && sel <= 1.0);
+}
+
+bool validCardinality(CEType card) {
+ return (card >= kMinCard && card <= std::numeric_limits<CEType>::max());
+}
+
+SelectivityType conjExponentialBackoff(std::vector<SelectivityType> conjSelectivities) {
+ size_t actualMaxBackoffElements = std::min(conjSelectivities.size(), kMaxBackoffElements);
+ std::partial_sort(conjSelectivities.begin(),
+ conjSelectivities.begin() + actualMaxBackoffElements,
+ conjSelectivities.end());
+ SelectivityType sel = conjSelectivities[0];
+ SelectivityType f = 1.0;
size_t i = 1;
- while (i < conjSelectivities.size() && i <= kMaxBackoffIterations) {
+ while (i < actualMaxBackoffElements) {
f /= 2.0;
sel *= std::pow(conjSelectivities[i], f);
i++;
@@ -47,12 +59,16 @@ double conjExponentialBackoff(std::vector<double> conjSelectivities) {
return sel;
}
-double disjExponentialBackoff(std::vector<double> disjSelectivities) {
- std::sort(disjSelectivities.begin(), disjSelectivities.end(), std::greater<double>());
- double sel = 1.0 - disjSelectivities[0];
- double f = 1.0;
+SelectivityType disjExponentialBackoff(std::vector<SelectivityType> disjSelectivities) {
+ size_t actualMaxBackoffElements = std::min(disjSelectivities.size(), kMaxBackoffElements);
+ std::partial_sort(disjSelectivities.begin(),
+ disjSelectivities.begin() + actualMaxBackoffElements,
+ disjSelectivities.end(),
+ std::greater<SelectivityType>());
+ SelectivityType sel = 1.0 - disjSelectivities[0];
+ SelectivityType f = 1.0;
size_t i = 1;
- while (i < disjSelectivities.size() && i <= kMaxBackoffIterations) {
+ while (i < actualMaxBackoffElements) {
f /= 2.0;
sel *= std::pow(1 - disjSelectivities[i], f);
i++;
diff --git a/src/mongo/db/query/ce/ce_math.h b/src/mongo/db/query/optimizer/utils/ce_math.h
index 3ef529a9224..a29a7d1f34b 100644
--- a/src/mongo/db/query/ce/ce_math.h
+++ b/src/mongo/db/query/optimizer/utils/ce_math.h
@@ -29,24 +29,43 @@
#pragma once
+#include <limits>
#include <vector>
+#include "mongo/db/query/optimizer/defs.h"
+
namespace mongo::ce {
+using namespace mongo::optimizer;
+
+// Default cardinality when actual collection cardinality is unknown.
+// Mostly used by unit tests.
+constexpr CEType kDefaultCard = 1000.00;
+
+// Minimum estimated cardinality. In the absense of any statistics we can never
+// assume there are less than this many matching documents.
+constexpr CEType kMinCard = 0.01;
+
/**
- * Specifies the maximum number of iterations to use when estimating via exponential backoff.
+ * Specifies the maximum number of elements (selectivities) to use when estimating via
+ * exponential backoff.
*/
-const size_t kMaxBackoffIterations = 3;
+const size_t kMaxBackoffElements = 4;
+
+
+bool validSelectivity(SelectivityType sel);
+
+bool validCardinality(CEType card);
/**
* Estimates the selectivity of a conjunction given the selectivities of its subexpressions using
* exponential backoff.
*/
-double conjExponentialBackoff(std::vector<double> conjSelectivities);
+SelectivityType conjExponentialBackoff(std::vector<SelectivityType> conjSelectivities);
/**
* Estimates the selectivity of a disjunction given the selectivities of its subexpressions using
* exponential backoff.
*/
-double disjExponentialBackoff(std::vector<double> disjSelectivities);
+SelectivityType disjExponentialBackoff(std::vector<SelectivityType> disjSelectivities);
} // namespace mongo::ce