summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2023-03-14 16:00:26 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-03-30 21:53:55 +0000
commit187851fdfda231e63a13b1189a8f8566c57e701a (patch)
tree856363f5466fd8167a2af8262b02f1b82122dbd6 /src/mongo/db/query
parent6afa8dd2bb3435e3632375a545d8a2c76a1cd620 (diff)
downloadmongo-187851fdfda231e63a13b1189a8f8566c57e701a.tar.gz
SERVER-74539 Enable disjunction in PSR conversion
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/ce/test_utils.h2
-rw-r--r--src/mongo/db/query/optimizer/SConscript1
-rw-r--r--src/mongo/db/query/optimizer/bool_expression.h4
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp8
-rw-r--r--src/mongo/db/query/optimizer/index_union_optimizer_test.cpp594
-rw-r--r--src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp58
-rw-r--r--src/mongo/db/query/optimizer/node.cpp9
-rw-r--r--src/mongo/db/query/optimizer/partial_schema_requirements.cpp31
-rw-r--r--src/mongo/db/query/optimizer/partial_schema_requirements.h26
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp185
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp294
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h3
12 files changed, 797 insertions, 418 deletions
diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h
index a894be82783..3a9c7f9c78e 100644
--- a/src/mongo/db/query/ce/test_utils.h
+++ b/src/mongo/db/query/ce/test_utils.h
@@ -116,7 +116,7 @@ bool isSargableNode(const ABT& n) {
// for a SargableNode with a specific number of predicates. For tests, we only care about
// verifying the cardinality of that one.
if (auto* sargable = n.cast<optimizer::SargableNode>()) {
- return sargable->getReqMap().numLeaves() == NumReq;
+ return PSRExpr::numLeaves(sargable->getReqMap().getRoot()) == NumReq;
}
return false;
}
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index f9090848d4f..2abdccf1f04 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -130,6 +130,7 @@ env.CppUnitTest(
target='optimizer_test',
source=[
"bool_expression_test.cpp",
+ "index_union_optimizer_test.cpp",
"logical_rewriter_optimizer_test.cpp",
"optimizer_test.cpp",
"physical_rewriter_optimizer_test.cpp",
diff --git a/src/mongo/db/query/optimizer/bool_expression.h b/src/mongo/db/query/optimizer/bool_expression.h
index 7e0371041b3..fb5a851781c 100644
--- a/src/mongo/db/query/optimizer/bool_expression.h
+++ b/src/mongo/db/query/optimizer/bool_expression.h
@@ -146,6 +146,10 @@ struct BoolExpr {
return {};
}
+ static bool isSingularDNF(const Node& n) {
+ return getSingularDNF(n).has_value();
+ }
+
using ChildVisitor = std::function<void(Node& child, const size_t childIndex)>;
using ChildVisitorConst = std::function<void(const Node& child, const size_t childIndex)>;
using AtomVisitor = std::function<void(T& expr)>;
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index d0d02208bc1..17052b694c4 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -566,7 +566,7 @@ static boost::optional<ABT> mergeSargableNodes(
return createEmptyValueScanNode(ctx);
}
- if (mergedReqs.numLeaves() > SargableNode::kMaxPartialSchemaReqs) {
+ if (PSRExpr::numLeaves(mergedReqs.getRoot()) > SargableNode::kMaxPartialSchemaReqs) {
return {};
}
@@ -678,7 +678,7 @@ static void convertFilterToSargableNode(ABT::reference_type node,
addEmptyValueScanNode(ctx);
return;
}
- if (conversion->_reqMap.numLeaves() > SargableNode::kMaxPartialSchemaReqs) {
+ if (PSRExpr::numLeaves(conversion->_reqMap.getRoot()) > SargableNode::kMaxPartialSchemaReqs) {
// Too many requirements.
return;
}
@@ -1089,7 +1089,7 @@ struct SubstituteConvert<EvaluationNode> {
uassert(6624165,
"Should not be getting retainPredicate set for EvalNodes",
!conversion->_retainPredicate);
- if (conversion->_reqMap.numLeaves() != 1) {
+ if (PSRExpr::numLeaves(conversion->_reqMap.getRoot()) != 1) {
// For evaluation nodes we expect to create a single entry.
return;
}
@@ -1344,7 +1344,7 @@ struct ExploreConvert<SargableNode> {
// try having at least one predicate on the left (mask = 1), and we try all possible
// subsets. For index intersection however (isIndex = true), we try symmetric partitioning
// (thus the high bound is 2^(N-1)).
- const size_t reqSize = reqMap.numConjuncts();
+ const size_t reqSize = PSRExpr::numLeaves(reqMap.getRoot()); // assumes singular DNF
const size_t highMask = isIndex ? (1ull << (reqSize - 1)) : (1ull << reqSize);
for (size_t mask = 1; mask < highMask; mask++) {
SplitRequirementsResult splitResult = splitRequirements(
diff --git a/src/mongo/db/query/optimizer/index_union_optimizer_test.cpp b/src/mongo/db/query/optimizer/index_union_optimizer_test.cpp
new file mode 100644
index 00000000000..b3cc24fe192
--- /dev/null
+++ b/src/mongo/db/query/optimizer/index_union_optimizer_test.cpp
@@ -0,0 +1,594 @@
+/**
+ * Copyright (C) 2023-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/pipeline/abt/utils.h"
+#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h"
+#include "mongo/db/query/optimizer/cascades/rewriter_rules.h"
+#include "mongo/db/query/optimizer/explain.h"
+#include "mongo/db/query/optimizer/metadata_factory.h"
+#include "mongo/db/query/optimizer/node.h"
+#include "mongo/db/query/optimizer/opt_phase_manager.h"
+#include "mongo/db/query/optimizer/rewrites/const_eval.h"
+#include "mongo/db/query/optimizer/utils/unit_test_abt_literals.h"
+#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
+#include "mongo/unittest/unittest.h"
+
+using namespace mongo::optimizer::unit_test_abt_literals;
+
+namespace mongo::optimizer {
+namespace {
+
+TEST(LogicalRewriter, MakeSargableNodeWithTopLevelDisjunction) {
+ // Hand-build SargableNode with top-level disjunction.
+ auto req = PartialSchemaRequirement(
+ boost::none, _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), false);
+
+ auto makeKey = [](std::string pathName) {
+ return PartialSchemaKey("ptest",
+ make<PathGet>(FieldNameType{pathName}, make<PathIdentity>()));
+ };
+ PSRExpr::Builder builder;
+ builder.pushDisj()
+ .pushConj()
+ .atom({makeKey("a"), req})
+ .atom({makeKey("b"), req})
+ .pop()
+ .pushConj()
+ .atom({makeKey("c"), req})
+ .atom({makeKey("d"), req})
+ .pop();
+ auto reqs = PartialSchemaRequirements(builder.finish().get());
+
+ ABT scanNode = make<ScanNode>("ptest", "test");
+ ABT sargableNode = make<SargableNode>(
+ reqs, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(scanNode));
+ ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}},
+ std::move(sargableNode));
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Root [{ptest}]\n"
+ "Sargable [Index]\n"
+ "| | | requirements: \n"
+ "| | | {\n"
+ "| | | {\n"
+ "| | | {refProjection: ptest, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [1]}}}}\n"
+ "| | | ^ \n"
+ "| | | {refProjection: ptest, path: 'PathGet [b] PathIdentity []', "
+ "intervals: {{{=Const [1]}}}}\n"
+ "| | | }\n"
+ "| | | U \n"
+ "| | | {\n"
+ "| | | {refProjection: ptest, path: 'PathGet [c] PathIdentity []', "
+ "intervals: {{{=Const [1]}}}}\n"
+ "| | | ^ \n"
+ "| | | {refProjection: ptest, path: 'PathGet [d] PathIdentity []', "
+ "intervals: {{{=Const [1]}}}}\n"
+ "| | | }\n"
+ "| | | }\n"
+ "| | candidateIndexes: \n"
+ "Scan [test, {ptest}]\n",
+ rootNode);
+
+ // Show that hashing a top-level disjunction doesn't throw.
+ ABTHashGenerator::generate(rootNode);
+}
+
+TEST(LogicalRewriter, ToplevelDisjunctionConversion) {
+ // When we have a Filter with a top-level disjunction,
+ // it gets translated to a Sargable node with top-level disjunction.
+
+ // {$or: [ {a: 2}, {b: 3} ]}
+ ABT rootNode = NodeBuilder{}
+ .root("scan_0")
+ .filter(_evalf(_composea(_get("a", _cmp("Eq", "2"_cint64)),
+ _get("b", _cmp("Eq", "3"_cint64))),
+ "scan_0"_var))
+ .finish(_scan("scan_0", "coll"));
+
+ auto prefixId = PrefixId::createForTests();
+ auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase},
+ prefixId,
+ {{{"coll", createScanDef({}, {})}}},
+ boost::none /*costModel*/,
+ DebugInfo::kDefaultForTests,
+ QueryHints{});
+
+ ABT optimized = rootNode;
+ phaseManager.optimize(optimized);
+
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Root [{scan_0}]\n"
+ "Sargable [Complete]\n"
+ "| | | | requirements: \n"
+ "| | | | {\n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [2]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', "
+ "intervals: {{{=Const [3]}}}}}\n"
+ "| | | | }\n"
+ "| | | candidateIndexes: \n"
+ "| | scanParams: \n"
+ "| | {'a': evalTemp_0, 'b': evalTemp_1}\n"
+ "| | residualReqs: \n"
+ "| | {\n"
+ "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: "
+ "{{{=Const [2]}}}, entryIndex: 0}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: "
+ "{{{=Const [3]}}}, entryIndex: 1}}\n"
+ "| | }\n"
+ "Scan [coll, {scan_0}]\n",
+ optimized);
+}
+
+TEST(LogicalRewriter, ToplevelNestedDisjunctionConversion) {
+ // When we have a Filter with a top-level disjunction,
+ // it gets translated to a Sargable node with top-level disjunction,
+ // even if it's a nested disjunction.
+
+ // {$or: [{$or: [{a: 2}. {b: 3}]}, {$or: [{c: 4}, {b: 5}]}]}
+ ABT rootNode = NodeBuilder{}
+ .root("scan_0")
+ .filter(_evalf(_composea(_composea(_get("a", _cmp("Eq", "2"_cint64)),
+ _get("b", _cmp("Eq", "3"_cint64))),
+ _composea(_get("c", _cmp("Eq", "4"_cint64)),
+ _get("d", _cmp("Eq", "5"_cint64)))),
+ "scan_0"_var))
+ .finish(_scan("scan_0", "coll"));
+
+ auto prefixId = PrefixId::createForTests();
+ auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase},
+ prefixId,
+ {{{"coll", createScanDef({}, {})}}},
+ boost::none /*costModel*/,
+ DebugInfo::kDefaultForTests,
+ QueryHints{});
+
+ ABT optimized = rootNode;
+ phaseManager.optimize(optimized);
+
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Root [{scan_0}]\n"
+ "Sargable [Complete]\n"
+ "| | | | requirements: \n"
+ "| | | | {\n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [2]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', "
+ "intervals: {{{=Const [3]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [c] PathIdentity []', "
+ "intervals: {{{=Const [4]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [d] PathIdentity []', "
+ "intervals: {{{=Const [5]}}}}}\n"
+ "| | | | }\n"
+ "| | | candidateIndexes: \n"
+ "| | scanParams: \n"
+ "| | {'a': evalTemp_0, 'b': evalTemp_1, 'c': evalTemp_2, 'd': evalTemp_3}\n"
+ "| | residualReqs: \n"
+ "| | {\n"
+ "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: "
+ "{{{=Const [2]}}}, entryIndex: 0}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: "
+ "{{{=Const [3]}}}, entryIndex: 1}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_2, path: 'PathIdentity []', intervals: "
+ "{{{=Const [4]}}}, entryIndex: 2}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_3, path: 'PathIdentity []', intervals: "
+ "{{{=Const [5]}}}, entryIndex: 3}}\n"
+ "| | }\n"
+ "Scan [coll, {scan_0}]\n",
+ optimized);
+}
+
+TEST(LogicalRewriter, ComplexBooleanConversion) {
+
+ auto leaf0 = _get("a", _cmp("Eq", "0"_cint64));
+ auto leaf1 = _get("b", _cmp("Eq", "1"_cint64));
+ auto leaf2 = _get("c", _cmp("Eq", "2"_cint64));
+ auto leaf3 = _get("d", _cmp("Eq", "3"_cint64));
+ auto leaf4 = _get("e", _cmp("Eq", "4"_cint64));
+ auto leaf5 = _get("f", _cmp("Eq", "5"_cint64));
+ auto path = _composem(
+ leaf0, _composea(leaf1, _composem(leaf2, _composea(leaf3, _composem(leaf4, leaf5)))));
+ ABT rootNode = NodeBuilder{}
+ .root("scan_0")
+ .filter(_evalf(path, "scan_0"_var))
+ .finish(_scan("scan_0", "coll"));
+
+ auto prefixId = PrefixId::createForTests();
+ auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase},
+ prefixId,
+ {{{"coll", createScanDef({}, {})}}},
+ boost::none /*costModel*/,
+ DebugInfo::kDefaultForTests,
+ QueryHints{});
+
+ ABT optimized = rootNode;
+ phaseManager.optimize(optimized);
+
+ // For now PSR conversion fails because the result would not be DNF.
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Root [{scan_0}]\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [scan_0]\n"
+ "| PathComposeA []\n"
+ "| | PathComposeM []\n"
+ "| | | PathComposeA []\n"
+ "| | | | PathComposeM []\n"
+ "| | | | | PathGet [f]\n"
+ "| | | | | PathCompare [Eq]\n"
+ "| | | | | Const [5]\n"
+ "| | | | PathGet [e]\n"
+ "| | | | PathCompare [Eq]\n"
+ "| | | | Const [4]\n"
+ "| | | PathGet [d]\n"
+ "| | | PathCompare [Eq]\n"
+ "| | | Const [3]\n"
+ "| | PathGet [c]\n"
+ "| | PathCompare [Eq]\n"
+ "| | Const [2]\n"
+ "| PathGet [b]\n"
+ "| PathCompare [Eq]\n"
+ "| Const [1]\n"
+ "Sargable [Complete]\n"
+ "| | | | requirements: \n"
+ "| | | | {{{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [0]}}}}}}\n"
+ "| | | candidateIndexes: \n"
+ "| | scanParams: \n"
+ "| | {'a': evalTemp_0}\n"
+ "| | residualReqs: \n"
+ "| | {{{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: "
+ "{{{=Const [0]}}}, entryIndex: 0}}}\n"
+ "Scan [coll, {scan_0}]\n",
+ optimized);
+}
+
+TEST(LogicalRewriter, DisjunctionProjectionConversion) {
+
+ auto leaf0 = _get("a", _cmp("Eq", "0"_cint64));
+ auto leaf1 = _get("b", _cmp("Eq", "1"_cint64));
+ auto path = _composea(leaf0, leaf1);
+ ABT rootNode = NodeBuilder{}
+ .root("doc")
+ .eval("doc", _evalp(_keep(FieldNameType{"x"}), "scan_0"_var))
+ .filter(_evalf(path, "scan_0"_var))
+ .finish(_scan("scan_0", "coll"));
+
+ auto prefixId = PrefixId::createForTests();
+ auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase},
+ prefixId,
+ {{{"coll", createScanDef({}, {})}}},
+ boost::none /*costModel*/,
+ DebugInfo::kDefaultForTests,
+ QueryHints{});
+
+ ABT optimized = rootNode;
+ phaseManager.optimize(optimized);
+
+ // We get two Sargable nodes, but they aren't combined, because converting to DNF would
+ // distribute the projection into both disjuncts, and for now we don't want to have
+ // projections inside a (nontrivial) disjunction.
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Root [{doc}]\n"
+ "Evaluation [{doc}]\n"
+ "| EvalPath []\n"
+ "| | Const [{}]\n"
+ "| PathField [x]\n"
+ "| PathConstant []\n"
+ "| Variable [fieldProj_0]\n"
+ "Sargable [Complete]\n"
+ "| | | | requirements: \n"
+ "| | | | {{{refProjection: scan_0, path: 'PathGet [x] PathIdentity []', "
+ "boundProjection: fieldProj_0, intervals: {{{<fully open>}}}}}}\n"
+ "| | | candidateIndexes: \n"
+ "| | scanParams: \n"
+ "| | {'x': fieldProj_0}\n"
+ "Sargable [Complete]\n"
+ "| | | | requirements: \n"
+ "| | | | {\n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [0]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', "
+ "intervals: {{{=Const [1]}}}}}\n"
+ "| | | | }\n"
+ "| | | candidateIndexes: \n"
+ "| | scanParams: \n"
+ "| | {'a': evalTemp_0, 'b': evalTemp_1}\n"
+ "| | residualReqs: \n"
+ "| | {\n"
+ "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: "
+ "{{{=Const [0]}}}, entryIndex: 0}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: "
+ "{{{=Const [1]}}}, entryIndex: 1}}\n"
+ "| | }\n"
+ "Scan [coll, {scan_0}]\n",
+ optimized);
+}
+
+TEST(LogicalRewriter, DisjunctionConversionDedup) {
+
+ auto leaf0 = _get("a", _cmp("Eq", "0"_cint64));
+ auto leaf1 = _get("b", _cmp("Eq", "1"_cint64));
+ auto path = _composea(_composea(leaf0, leaf1), _composea(leaf0, leaf0));
+ ABT rootNode = NodeBuilder{}
+ .root("scan_0")
+ .filter(_evalf(path, "scan_0"_var))
+ .finish(_scan("scan_0", "coll"));
+
+ auto prefixId = PrefixId::createForTests();
+ auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase},
+ prefixId,
+ {{{"coll", createScanDef({}, {})}}},
+ boost::none /*costModel*/,
+ DebugInfo::kDefaultForTests,
+ QueryHints{});
+
+ ABT optimized = rootNode;
+ phaseManager.optimize(optimized);
+
+ // We should see everything get reordered and deduped,
+ // so each of the leaf predicates appears once.
+ // TODO SERVER-73827 We should get 2 leaf predicates instead of 3 here.
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Root [{scan_0}]\n"
+ "Sargable [Complete]\n"
+ "| | | | requirements: \n"
+ "| | | | {\n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [0]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', "
+ "intervals: {{{=Const [0]}}}}}\n"
+ "| | | | U \n"
+ "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', "
+ "intervals: {{{=Const [1]}}}}}\n"
+ "| | | | }\n"
+ "| | | candidateIndexes: \n"
+ "| | scanParams: \n"
+ "| | {'a': evalTemp_0, 'b': evalTemp_1}\n"
+ "| | residualReqs: \n"
+ "| | {\n"
+ "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: "
+ "{{{=Const [0]}}}, entryIndex: 0}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: "
+ "{{{=Const [0]}}}, entryIndex: 1}}\n"
+ "| | U \n"
+ "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: "
+ "{{{=Const [1]}}}, entryIndex: 2}}\n"
+ "| | }\n"
+ "Scan [coll, {scan_0}]\n",
+ optimized);
+}
+
+TEST(PhysRewriter, LowerRequirementsWithTopLevelDisjunction) {
+ auto req =
+ PartialSchemaRequirement(boost::none,
+ _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))),
+ false /*perfOnly*/);
+
+ auto makeKey = [](std::string pathName) {
+ return PartialSchemaKey("ptest",
+ make<PathGet>(FieldNameType{pathName}, make<PathIdentity>()));
+ };
+
+ CEType scanGroupCE{10.0};
+ FieldProjectionMap fieldProjectionMap;
+ fieldProjectionMap._rootProjection = "ptest";
+ std::vector<SelectivityType> indexPredSels;
+
+ PhysPlanBuilder builder;
+ builder.make<PhysicalScanNode>(
+ scanGroupCE, fieldProjectionMap, "test" /* scanDefName */, false /* parallelScan */);
+
+ ResidualRequirementsWithOptionalCE::Builder residReqsBuilder;
+ residReqsBuilder.pushDisj()
+ .pushConj()
+ .atom({makeKey("a"), req, CEType{2.0}})
+ .atom({makeKey("b"), req, CEType{3.0}})
+ .pop()
+ .pushConj()
+ .atom({makeKey("c"), req, CEType{5.0}})
+ .atom({makeKey("d"), req, CEType{4.0}})
+ .pop();
+ auto residReqs = residReqsBuilder.finish().get();
+ lowerPartialSchemaRequirements(
+ scanGroupCE, indexPredSels, residReqs, defaultConvertPathToInterval, builder);
+
+ ASSERT_EXPLAIN_V2_AUTO(
+ "Filter []\n"
+ "| BinaryOp [Or]\n"
+ "| | BinaryOp [And]\n"
+ "| | | EvalFilter []\n"
+ "| | | | Variable [ptest]\n"
+ "| | | PathGet [c]\n"
+ "| | | PathCompare [Eq]\n"
+ "| | | Const [1]\n"
+ "| | EvalFilter []\n"
+ "| | | Variable [ptest]\n"
+ "| | PathGet [d]\n"
+ "| | PathCompare [Eq]\n"
+ "| | Const [1]\n"
+ "| BinaryOp [And]\n"
+ "| | EvalFilter []\n"
+ "| | | Variable [ptest]\n"
+ "| | PathGet [b]\n"
+ "| | PathCompare [Eq]\n"
+ "| | Const [1]\n"
+ "| EvalFilter []\n"
+ "| | Variable [ptest]\n"
+ "| PathGet [a]\n"
+ "| PathCompare [Eq]\n"
+ "| Const [1]\n"
+ "PhysicalScan [{'<root>': ptest}, test]\n",
+ builder._node);
+}
+
+TEST(PhysRewriter, OptimizeSargableNodeWithTopLevelDisjunction) {
+ auto req =
+ PartialSchemaRequirement(boost::none,
+ _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))),
+ false /*perfOnly*/);
+
+ auto makeKey = [](std::string pathName) {
+ return PartialSchemaKey("ptest",
+ make<PathGet>(FieldNameType{pathName}, make<PathIdentity>()));
+ };
+
+ // Create three SargableNodes with top-level disjunctions.
+ PSRExpr::Builder builder;
+ builder.pushDisj()
+ .pushConj()
+ .atom({makeKey("a"), req})
+ .atom({makeKey("b"), req})
+ .pop()
+ .pushConj()
+ .atom({makeKey("c"), req})
+ .atom({makeKey("d"), req})
+ .pop();
+ auto reqs1 = PartialSchemaRequirements(builder.finish().get());
+
+ builder.pushDisj()
+ .pushConj()
+ .atom({makeKey("e"), req})
+ .pop()
+ .pushConj()
+ .atom({makeKey("f"), req})
+ .pop();
+ auto reqs2 = PartialSchemaRequirements(builder.finish().get());
+
+ builder.pushDisj().pushConj().atom({makeKey("g"), req}).pop();
+ auto reqs3 = PartialSchemaRequirements(builder.finish().get());
+
+ // During logical optimization, the SargableNodes not directly above the Scan will first be
+ // lowered to Filter nodes based on their requirements. The SargableNode immediately above the
+ // Scan will be lowered later based on its residual requirements.
+ ResidualRequirements::Builder residReqs;
+ residReqs.pushDisj()
+ .pushConj()
+ .atom({makeKey("a"), req, 0})
+ .atom({makeKey("b"), req, 1})
+ .pop()
+ .pushConj()
+ .atom({makeKey("c"), req, 2})
+ .atom({makeKey("d"), req, 3})
+ .pop();
+ ScanParams scanParams;
+ scanParams._residualRequirements = residReqs.finish();
+
+ ABT scanNode = make<ScanNode>("ptest", "test");
+ ABT sargableNode1 = make<SargableNode>(
+ reqs1, CandidateIndexes(), scanParams, IndexReqTarget::Index, std::move(scanNode));
+ ABT sargableNode2 = make<SargableNode>(
+ reqs2, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode1));
+ ABT sargableNode3 = make<SargableNode>(
+ reqs3, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode2));
+ ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}},
+ std::move(sargableNode3));
+
+ // Show that the optimization of the SargableNode does not throw, and that all three
+ // SargableNodes are correctly lowered to FilterNodes.
+ auto prefixId = PrefixId::createForTests();
+ auto phaseManager = makePhaseManager(
+ {OptPhase::MemoSubstitutionPhase,
+ OptPhase::MemoExplorationPhase,
+ OptPhase::MemoImplementationPhase},
+ prefixId,
+ {{{"test",
+ createScanDef(
+ {},
+ {
+ // For now, verify that we do not get an indexed plan even when there
+ // are indexes available on the queried fields.
+ {"ab",
+ IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending},
+ {makeIndexPath("b"), CollationOp::Ascending}},
+ false /*isMultiKey*/,
+ {DistributionType::Centralized},
+ {}}},
+ {"cd",
+ IndexDefinition{{{makeIndexPath("c"), CollationOp::Ascending},
+ {makeIndexPath("d"), CollationOp::Ascending}},
+ false /*isMultiKey*/,
+ {DistributionType::Centralized},
+ {}}},
+ {"e", makeIndexDefinition("e", CollationOp::Ascending, false /*isMultiKey*/)},
+ {"f", makeIndexDefinition("f", CollationOp::Ascending, false /*isMultiKey*/)},
+ {"g", makeIndexDefinition("g", CollationOp::Ascending, false /*isMultiKey*/)},
+ })}}},
+ boost::none /*costModel*/,
+ DebugInfo::kDefaultForTests);
+ phaseManager.optimize(rootNode);
+
+ ASSERT_EXPLAIN_V2Compact_AUTO(
+ "Root [{ptest}]\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [ptest]\n"
+ "| PathGet [g] PathCompare [Eq] Const [1]\n"
+ "Filter []\n"
+ "| BinaryOp [Or]\n"
+ "| | EvalFilter []\n"
+ "| | | Variable [ptest]\n"
+ "| | PathGet [f] PathCompare [Eq] Const [1]\n"
+ "| EvalFilter []\n"
+ "| | Variable [ptest]\n"
+ "| PathGet [e] PathCompare [Eq] Const [1]\n"
+ "Filter []\n"
+ "| BinaryOp [Or]\n"
+ "| | BinaryOp [And]\n"
+ "| | | EvalFilter []\n"
+ "| | | | Variable [ptest]\n"
+ "| | | PathGet [d] PathCompare [Eq] Const [1]\n"
+ "| | EvalFilter []\n"
+ "| | | Variable [ptest]\n"
+ "| | PathGet [c] PathCompare [Eq] Const [1]\n"
+ "| BinaryOp [And]\n"
+ "| | EvalFilter []\n"
+ "| | | Variable [ptest]\n"
+ "| | PathGet [b] PathCompare [Eq] Const [1]\n"
+ "| EvalFilter []\n"
+ "| | Variable [ptest]\n"
+ "| PathGet [a] PathCompare [Eq] Const [1]\n"
+ "PhysicalScan [{'<root>': ptest}, test]\n",
+ rootNode);
+}
+
+} // namespace
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
index a66cb8b81a3..3947ee71d87 100644
--- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/query/optimizer/utils/unit_test_utils.h"
#include "mongo/unittest/unittest.h"
+using namespace mongo::optimizer::unit_test_abt_literals;
namespace mongo::optimizer {
namespace {
@@ -1555,7 +1556,6 @@ TEST(LogicalRewriter, NotPushdownUnderLambdaKeepOuterTraverse) {
}
TEST(LogicalRewriter, NotPushdownUnderLambdaFailsWithFreeVar) {
- using namespace mongo::optimizer::unit_test_abt_literals;
// When we eliminate a Not, we can't eliminate the Lambda [x] if it would leave free
// occurrences of 'x'.
@@ -2027,61 +2027,5 @@ TEST(LogicalRewriter, EmptyArrayIndexBounds) {
rootNode);
}
-TEST(LogicalRewriter, MakeSargableNodeWithTopLevelDisjunction) {
- using namespace unit_test_abt_literals;
-
- // Hand-build SargableNode with top-level disjunction.
- auto req = PartialSchemaRequirement(
- boost::none, _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), false);
-
- auto makeKey = [](std::string pathName) {
- return PartialSchemaKey("ptest",
- make<PathGet>(FieldNameType{pathName}, make<PathIdentity>()));
- };
- PSRExpr::Builder builder;
- builder.pushDisj()
- .pushConj()
- .atom({makeKey("a"), req})
- .atom({makeKey("b"), req})
- .pop()
- .pushConj()
- .atom({makeKey("c"), req})
- .atom({makeKey("d"), req})
- .pop();
- auto reqs = PartialSchemaRequirements(builder.finish().get());
-
- ABT scanNode = make<ScanNode>("ptest", "test");
- ABT sargableNode = make<SargableNode>(
- reqs, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(scanNode));
- ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}},
- std::move(sargableNode));
- ASSERT_EXPLAIN_V2_AUTO(
- "Root [{ptest}]\n"
- "Sargable [Index]\n"
- "| | | requirements: \n"
- "| | | {\n"
- "| | | {\n"
- "| | | {refProjection: ptest, path: 'PathGet [a] PathIdentity []', "
- "intervals: {{{=Const [1]}}}}\n"
- "| | | ^ \n"
- "| | | {refProjection: ptest, path: 'PathGet [b] PathIdentity []', "
- "intervals: {{{=Const [1]}}}}\n"
- "| | | }\n"
- "| | | U \n"
- "| | | {\n"
- "| | | {refProjection: ptest, path: 'PathGet [c] PathIdentity []', "
- "intervals: {{{=Const [1]}}}}\n"
- "| | | ^ \n"
- "| | | {refProjection: ptest, path: 'PathGet [d] PathIdentity []', "
- "intervals: {{{=Const [1]}}}}\n"
- "| | | }\n"
- "| | | }\n"
- "| | candidateIndexes: \n"
- "Scan [test, {ptest}]\n",
- rootNode);
-
- // Show that hashing a top-level disjunction doesn't throw.
- ABTHashGenerator::generate(rootNode);
-}
} // namespace
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp
index 99b48fb0970..e4e8892119e 100644
--- a/src/mongo/db/query/optimizer/node.cpp
+++ b/src/mongo/db/query/optimizer/node.cpp
@@ -405,9 +405,12 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap,
tassert(6624085, "SargableNode requires at least one predicate", !_reqMap.isNoop());
tassert(
7447500, "SargableNode requirements should be in DNF", PSRExpr::isDNF(_reqMap.getRoot()));
- tassert(6624086,
- str::stream() << "SargableNode has too many predicates: " << _reqMap.numLeaves(),
- _reqMap.numLeaves() <= kMaxPartialSchemaReqs);
+ if (const size_t numLeaves = PSRExpr::numLeaves(_reqMap.getRoot());
+ numLeaves > kMaxPartialSchemaReqs) {
+ tasserted(6624086,
+ str::stream() << "SargableNode has too many predicates: " << numLeaves
+ << ". We allow at most " << kMaxPartialSchemaReqs);
+ }
auto bindings = createSargableBindings(_reqMap);
tassert(7410100,
diff --git a/src/mongo/db/query/optimizer/partial_schema_requirements.cpp b/src/mongo/db/query/optimizer/partial_schema_requirements.cpp
index 0659675ccee..40b54d4d61d 100644
--- a/src/mongo/db/query/optimizer/partial_schema_requirements.cpp
+++ b/src/mongo/db/query/optimizer/partial_schema_requirements.cpp
@@ -64,26 +64,12 @@ private:
}
};
-// Make a BoolExpr representing a conjunction of the entries. It will be an OR of a single AND.
-PSRExpr::Node makePSRExpr(std::vector<PartialSchemaEntry> entries) {
- PSRExpr::Builder b;
- b.pushDisj().pushConj();
- for (auto& entry : entries) {
- b.atom(std::move(entry));
- }
-
- auto res = b.finish();
- tassert(7016402, "PartialSchemaRequirements could not be constructed", res.has_value());
- return res.get();
-}
-
// A no-op entry has a default key and a requirement that is fully open and does not bind.
PartialSchemaEntry makeNoopPartialSchemaEntry() {
return {PartialSchemaKey(),
PartialSchemaRequirement(
boost::none /*boundProjectionName*/,
- IntervalReqExpr::makeSingularDNF(IntervalRequirement(
- BoundRequirement::makeMinusInf(), BoundRequirement::makePlusInf())),
+ IntervalReqExpr::makeSingularDNF(IntervalRequirement{/*fully open*/}),
false /*isPerfOnly*/)};
}
} // namespace
@@ -92,9 +78,6 @@ void PartialSchemaRequirements::normalize() {
PSRNormalizeTransporter{}.normalize(_expr);
}
-PartialSchemaRequirements::PartialSchemaRequirements(std::vector<Entry> entries)
- : PartialSchemaRequirements(makePSRExpr(entries)) {}
-
PartialSchemaRequirements::PartialSchemaRequirements(PSRExpr::Node requirements)
: _expr(std::move(requirements)) {
tassert(7016403,
@@ -113,7 +96,7 @@ bool PartialSchemaRequirements::operator==(const PartialSchemaRequirements& othe
bool PartialSchemaRequirements::isNoop() const {
// A PartialSchemaRequirements is a no-op if it has exactly zero predicates/projections...
- auto numPreds = numLeaves();
+ const size_t numPreds = PSRExpr::numLeaves(getRoot());
if (numPreds == 0) {
return true;
} else if (numPreds > 1) {
@@ -135,14 +118,6 @@ bool PartialSchemaRequirements::isNoop() const {
return reqIsNoop;
}
-size_t PartialSchemaRequirements::numLeaves() const {
- return PSRExpr::numLeaves(_expr);
-}
-
-size_t PartialSchemaRequirements::numConjuncts() const {
- return numLeaves();
-}
-
boost::optional<ProjectionName> PartialSchemaRequirements::findProjection(
const PartialSchemaKey& key) const {
tassert(7453908,
@@ -177,6 +152,8 @@ PartialSchemaRequirements::findFirstConjunct(const PartialSchemaKey& key) const
void PartialSchemaRequirements::add(PartialSchemaKey key, PartialSchemaRequirement req) {
tassert(7016406, "Expected a PartialSchemaRequirements in DNF form", PSRExpr::isDNF(_expr));
+ // TODO SERVER-69026 Consider applying the distributive law.
+ tassert(7453912, "Expected a singleton disjunction", PSRExpr::isSingletonDisjunction(_expr));
// Add an entry to the first conjunction
PSRExpr::visitDisjuncts(_expr, [&](PSRExpr::Node& disjunct, const size_t i) {
diff --git a/src/mongo/db/query/optimizer/partial_schema_requirements.h b/src/mongo/db/query/optimizer/partial_schema_requirements.h
index fb84db887fd..47feba141e8 100644
--- a/src/mongo/db/query/optimizer/partial_schema_requirements.h
+++ b/src/mongo/db/query/optimizer/partial_schema_requirements.h
@@ -123,11 +123,6 @@ public:
PartialSchemaRequirements(PSRExpr::Node requirements);
- // TODO SERVER-74539: remove these contructors.
- PartialSchemaRequirements(std::vector<Entry>);
- PartialSchemaRequirements(std::initializer_list<Entry> entries)
- : PartialSchemaRequirements(std::vector<Entry>(entries)) {}
-
bool operator==(const PartialSchemaRequirements& other) const;
/**
@@ -137,17 +132,6 @@ public:
bool isNoop() const;
/**
- * Return the number of PartialSchemaEntries.
- */
- size_t numLeaves() const;
-
- /**
- * Return the number of Disjunctions under a top-level Conjunction.
- * TODO SERVER-69026 Remove or clarify this method.
- */
- size_t numConjuncts() const;
-
- /**
* Return the bound projection name corresponding to the first conjunct matching the given key.
* Assert on non-DNF requirements.
*/
@@ -187,8 +171,14 @@ public:
}
/**
- * Add an entry to the first AND under a top-level OR. Asserts on non-DNF requirements.
- * TODO SERVER-74539, remove or clarify this method.
+ * Conjunctively combine 'this' with another PartialSchemaRequirement.
+ * Asserts that 'this' is in DNF.
+ *
+ * For now, we assert that we have only one disjunct. This means we avoid applying
+ * the distributive law, which would duplicate the new requirement into each disjunct.
+ *
+ * TODO SERVER-69026 Consider applying the distributive law to allow contained-OR in
+ * SargableNode.
*/
void add(PartialSchemaKey, PartialSchemaRequirement);
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 8057ae362b7..18cc3052610 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -5398,190 +5398,5 @@ TEST(PhysRewriter, ExtractAllPlans) {
"| minKey], Const [1 | maxKey]]}]\n",
getExplainForPlan(2));
}
-
-TEST(PhysRewriter, LowerRequirementsWithTopLevelDisjunction) {
- auto req =
- PartialSchemaRequirement(boost::none,
- _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))),
- false /*perfOnly*/);
-
- auto makeKey = [](std::string pathName) {
- return PartialSchemaKey("ptest",
- make<PathGet>(FieldNameType{pathName}, make<PathIdentity>()));
- };
-
- CEType scanGroupCE{10.0};
- FieldProjectionMap fieldProjectionMap;
- fieldProjectionMap._rootProjection = "ptest";
- std::vector<SelectivityType> indexPredSels;
-
- PhysPlanBuilder builder;
- builder.make<PhysicalScanNode>(
- scanGroupCE, fieldProjectionMap, "test" /* scanDefName */, false /* parallelScan */);
-
- ResidualRequirementsWithOptionalCE::Builder residReqsBuilder;
- residReqsBuilder.pushDisj()
- .pushConj()
- .atom({makeKey("a"), req, CEType{2.0}})
- .atom({makeKey("b"), req, CEType{3.0}})
- .pop()
- .pushConj()
- .atom({makeKey("c"), req, CEType{5.0}})
- .atom({makeKey("d"), req, CEType{4.0}})
- .pop();
- auto residReqs = residReqsBuilder.finish().get();
- lowerPartialSchemaRequirements(
- scanGroupCE, indexPredSels, residReqs, defaultConvertPathToInterval, builder);
-
- ASSERT_EXPLAIN_V2_AUTO(
- "Filter []\n"
- "| BinaryOp [Or]\n"
- "| | BinaryOp [And]\n"
- "| | | EvalFilter []\n"
- "| | | | Variable [ptest]\n"
- "| | | PathGet [c]\n"
- "| | | PathCompare [Eq]\n"
- "| | | Const [1]\n"
- "| | EvalFilter []\n"
- "| | | Variable [ptest]\n"
- "| | PathGet [d]\n"
- "| | PathCompare [Eq]\n"
- "| | Const [1]\n"
- "| BinaryOp [And]\n"
- "| | EvalFilter []\n"
- "| | | Variable [ptest]\n"
- "| | PathGet [b]\n"
- "| | PathCompare [Eq]\n"
- "| | Const [1]\n"
- "| EvalFilter []\n"
- "| | Variable [ptest]\n"
- "| PathGet [a]\n"
- "| PathCompare [Eq]\n"
- "| Const [1]\n"
- "PhysicalScan [{'<root>': ptest}, test]\n",
- builder._node);
-}
-
-TEST(PhysRewriter, OptimizeSargableNodeWithTopLevelDisjunction) {
- auto req =
- PartialSchemaRequirement(boost::none,
- _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))),
- false /*perfOnly*/);
-
- auto makeKey = [](std::string pathName) {
- return PartialSchemaKey("ptest",
- make<PathGet>(FieldNameType{pathName}, make<PathIdentity>()));
- };
-
- // Create three SargableNodes with top-level disjunctions.
- PSRExpr::Builder builder;
- builder.pushDisj()
- .pushConj()
- .atom({makeKey("a"), req})
- .atom({makeKey("b"), req})
- .pop()
- .pushConj()
- .atom({makeKey("c"), req})
- .atom({makeKey("d"), req})
- .pop();
- auto reqs1 = PartialSchemaRequirements(builder.finish().get());
-
- builder.pushDisj()
- .pushConj()
- .atom({makeKey("e"), req})
- .pop()
- .pushConj()
- .atom({makeKey("f"), req})
- .pop();
- auto reqs2 = PartialSchemaRequirements(builder.finish().get());
-
- builder.pushDisj().pushConj().atom({makeKey("g"), req}).pop();
- auto reqs3 = PartialSchemaRequirements(builder.finish().get());
-
- // During logical optimization, the SargableNodes not directly above the Scan will first be
- // lowered to Filter nodes based on their requirements. The SargableNode immediately above the
- // Scan will be lowered later based on its residual requirements.
- ResidualRequirements::Builder residReqs;
- residReqs.pushDisj()
- .pushConj()
- .atom({makeKey("a"), req, 0})
- .atom({makeKey("b"), req, 1})
- .pop()
- .pushConj()
- .atom({makeKey("c"), req, 2})
- .atom({makeKey("d"), req, 3})
- .pop();
- ScanParams scanParams;
- scanParams._residualRequirements = residReqs.finish();
-
- ABT scanNode = make<ScanNode>("ptest", "test");
- ABT sargableNode1 = make<SargableNode>(
- reqs1, CandidateIndexes(), scanParams, IndexReqTarget::Index, std::move(scanNode));
- ABT sargableNode2 = make<SargableNode>(
- reqs2, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode1));
- ABT sargableNode3 = make<SargableNode>(
- reqs3, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode2));
- ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}},
- std::move(sargableNode3));
-
- // Show that the optimization of the SargableNode does not throw, and that all three
- // SargableNodes are correctly lowered to FilterNodes.
- auto prefixId = PrefixId::createForTests();
- auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase,
- OptPhase::MemoExplorationPhase,
- OptPhase::MemoImplementationPhase},
- prefixId,
- {{{"test", ScanDefinition()}}},
- boost::none /*costModel*/,
- DebugInfo::kDefaultForTests);
- phaseManager.optimize(rootNode);
-
- ASSERT_EXPLAIN_V2_AUTO(
- "Root [{ptest}]\n"
- "Filter []\n"
- "| EvalFilter []\n"
- "| | Variable [ptest]\n"
- "| PathGet [g]\n"
- "| PathCompare [Eq]\n"
- "| Const [1]\n"
- "Filter []\n"
- "| BinaryOp [Or]\n"
- "| | EvalFilter []\n"
- "| | | Variable [ptest]\n"
- "| | PathGet [f]\n"
- "| | PathCompare [Eq]\n"
- "| | Const [1]\n"
- "| EvalFilter []\n"
- "| | Variable [ptest]\n"
- "| PathGet [e]\n"
- "| PathCompare [Eq]\n"
- "| Const [1]\n"
- "Filter []\n"
- "| BinaryOp [Or]\n"
- "| | BinaryOp [And]\n"
- "| | | EvalFilter []\n"
- "| | | | Variable [ptest]\n"
- "| | | PathGet [d]\n"
- "| | | PathCompare [Eq]\n"
- "| | | Const [1]\n"
- "| | EvalFilter []\n"
- "| | | Variable [ptest]\n"
- "| | PathGet [c]\n"
- "| | PathCompare [Eq]\n"
- "| | Const [1]\n"
- "| BinaryOp [And]\n"
- "| | EvalFilter []\n"
- "| | | Variable [ptest]\n"
- "| | PathGet [b]\n"
- "| | PathCompare [Eq]\n"
- "| | Const [1]\n"
- "| EvalFilter []\n"
- "| | Variable [ptest]\n"
- "| PathGet [a]\n"
- "| PathCompare [Eq]\n"
- "| Const [1]\n"
- "PhysicalScan [{'<root>': ptest}, test]\n",
- rootNode);
-}
} // namespace
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index be43d300c6e..145784ae905 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -278,39 +278,43 @@ public:
leftResult->_hasIntersected = true;
return leftResult;
}
- // Additive composition: we never have projections in this case; only predicates.
- for (const auto& [key, req] : leftReqMap.conjuncts()) {
- tassert(7155021,
- "Unexpected binding in ComposeA in PartialSchemaReqConverter",
- !req.getBoundProjectionName());
- }
- for (const auto& [key, req] : rightReqMap.conjuncts()) {
- tassert(7155022,
- "Unexpected binding in ComposeA in PartialSchemaReqConverter",
- !req.getBoundProjectionName());
- }
+ // From this point on we only handle additive composition.
+
+ // Check if the left and right requirements are all or none perf-only.
+ size_t perfOnlyCount = 0;
+ for (const auto* reqs : {&leftReqMap, &rightReqMap}) {
+ PSRExpr::visitAnyShape(reqs->getRoot(), [&](const PartialSchemaEntry& e) {
+ const auto& [key, req] = e;
+ // Additive composition should only have predicates; no projections.
+ tassert(7155021,
+ "Unexpected binding in ComposeA in PartialSchemaReqConverter",
+ !req.getBoundProjectionName());
- {
- // Check if the left and right requirements are all or none perf-only.
- size_t perfOnlyCount = 0;
- for (const auto& [key, req] : leftReqMap.conjuncts()) {
- if (req.getIsPerfOnly()) {
- perfOnlyCount++;
- }
- }
- for (const auto& [key, req] : rightReqMap.conjuncts()) {
if (req.getIsPerfOnly()) {
perfOnlyCount++;
}
- }
- if (perfOnlyCount != 0 &&
- perfOnlyCount != leftReqMap.numLeaves() + rightReqMap.numLeaves()) {
- // For now allow only predicates with the same perf-only flag.
- return {};
- }
+ });
+ }
+ if (perfOnlyCount != 0 &&
+ perfOnlyCount !=
+ PSRExpr::numLeaves(leftReqMap.getRoot()) +
+ PSRExpr::numLeaves(rightReqMap.getRoot())) {
+ // For now allow only predicates with the same perf-only flag.
+ return {};
+ }
+
+ if (ResultType sameFieldDisj = createSameFieldDisjunction(leftResult, rightResult)) {
+ return sameFieldDisj;
}
- return createSameFieldDisjunction(leftResult, rightResult);
+ // Handle a general disjunction.
+ auto result = PartialSchemaReqConversion{
+ unionPartialSchemaReq(std::move(leftReqMap), std::move(rightReqMap))};
+ if (leftResult->_retainPredicate || rightResult->_retainPredicate) {
+ // If either argument is an over-approximation, then so is the result.
+ result._retainPredicate = true;
+ }
+ return result;
}
/**
@@ -319,12 +323,21 @@ public:
*
* When this function returns a nonempty optional, it may modify or move from the arguments.
* When it returns boost::none the arguments are unchanged.
+ *
+ * TODO SERVER-73827 Instead of handling these special cases, just construct a disjunction and
+ * then simplify; and get rid of this function.
*/
static ResultType createSameFieldDisjunction(ResultType& leftResult, ResultType& rightResult) {
auto& leftReqMap = leftResult->_reqMap;
auto& rightReqMap = rightResult->_reqMap;
+ if (!PSRExpr::isSingletonDisjunction(leftReqMap.getRoot()) ||
+ !PSRExpr::isSingletonDisjunction(rightReqMap.getRoot())) {
+ return {};
+ }
+
auto leftEntries = leftReqMap.conjuncts();
auto rightEntries = rightReqMap.conjuncts();
+
auto leftEntry = leftEntries.begin();
auto rightEntry = rightEntries.begin();
auto& [leftKey, leftReq] = *leftEntry;
@@ -369,7 +382,8 @@ public:
}
// Left and right don't all use the same key.
- if (leftReqMap.numLeaves() != 1 || rightReqMap.numLeaves() != 1) {
+ if (PSRExpr::numLeaves(leftReqMap.getRoot()) != 1 ||
+ PSRExpr::numLeaves(rightReqMap.getRoot()) != 1) {
return {};
}
// Left and right don't all use the same key, but they both have exactly 1 entry.
@@ -483,19 +497,28 @@ public:
auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{
{true /*inclusive*/, Constant::null()}, {true /*inclusive*/, Constant::null()}});
- return {{PartialSchemaRequirements{
- {PartialSchemaKey{make<PathIdentity>()},
- PartialSchemaRequirement{boost::none /*boundProjectionName*/,
- std::move(intervalExpr),
- false /*isPerfOnly*/}}}}};
+ return {{PartialSchemaRequirements{PSRExpr::makeSingularDNF(
+ PartialSchemaKey{make<PathIdentity>()},
+ PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+ std::move(intervalExpr),
+ false /*isPerfOnly*/})}}};
}
return handleComposition<false /*isMultiplicative*/>(std::move(leftResult),
std::move(rightResult));
}
+ /**
+ * Prepend a Get or Traverse to each Atom in the argument reqMap.
+ *
+ * 'n' specifies a single node to prepend. The child of 'n' is ignored. For example
+ * if 'n' is 'Get [a] Get [b] Id' then this function prepends 'Get [a]' to 'inputResult'.
+ *
+ * Only considers atoms with an empty input binding. Atoms with a nonempty input binding are
+ * ignored.
+ */
template <class T>
- static ResultType handleGetAndTraverse(const ABT& n, ResultType inputResult) {
+ static ResultType prependGetOrTraverse(const ABT& n, ResultType inputResult) {
if (!inputResult) {
return {};
}
@@ -503,15 +526,7 @@ public:
return {};
}
- // New map has keys with appended paths.
- PSRExpr::Builder newReqs;
- newReqs.pushDisj().pushConj();
-
- for (const auto& entry : inputResult->_reqMap.conjuncts()) {
- if (entry.first._projectionName) {
- return {};
- }
-
+ PSRExpr::visitAnyShape(inputResult->_reqMap.getRoot(), [&](PartialSchemaEntry& entry) {
ABT path = entry.first._path;
// Updated key path to be now rooted at n, with existing key path as child.
@@ -519,29 +534,33 @@ public:
std::swap(appendedPath.cast<T>()->getPath(), path);
std::swap(path, appendedPath);
- newReqs.atom(PartialSchemaKey{std::move(path)}, std::move(entry.second));
- }
-
- inputResult->_reqMap = std::move(*newReqs.finish());
+ entry.first._path = path;
+ });
return inputResult;
}
ResultType transport(const ABT& n, const PathGet& pathGet, ResultType inputResult) {
- return handleGetAndTraverse<PathGet>(n, std::move(inputResult));
+ return prependGetOrTraverse<PathGet>(n, std::move(inputResult));
}
ResultType transport(const ABT& n, const PathTraverse& pathTraverse, ResultType inputResult) {
if (!inputResult) {
return {};
}
- if (inputResult->_reqMap.numConjuncts() > 1) {
- // More than one requirement means we are handling a conjunction inside a traverse.
+
+ if (!PSRExpr::isSingularDNF(inputResult->_reqMap.getRoot())) {
+ // More than one requirement means we may have a conjunction inside a traverse.
// We can change it to a traverse inside a conjunction, but that's an
// over-approximation, so we have to keep the original predicate.
inputResult->_retainPredicate = true;
+
+ // Note that we could improve this by pushing traverse through disjunction without loss
+ // of precision. 'Traverse (ComposeA X Y) == ComposeA (Traverse X) (Traverse Y)' because
+ // Traverse is a disjunction over array elements, so it's ok to re-associate the
+ // disjunctions.
}
- auto result = handleGetAndTraverse<PathTraverse>(n, std::move(inputResult));
+ auto result = prependGetOrTraverse<PathTraverse>(n, std::move(inputResult));
if (result) {
result->_hasTraversed = true;
}
@@ -585,10 +604,10 @@ public:
}
return {{PartialSchemaRequirements{
- {PartialSchemaKey{make<PathIdentity>()},
- PartialSchemaRequirement{boost::none /*boundProjectionName*/,
- std::move(*unionedInterval),
- false /*isPerfOnly*/}}}}};
+ PSRExpr::makeSingularDNF(PartialSchemaKey{make<PathIdentity>()},
+ PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+ std::move(*unionedInterval),
+ false /*isPerfOnly*/})}}};
}
ResultType transport(const ABT& n, const PathCompare& pathCompare, ResultType inputResult) {
@@ -635,17 +654,18 @@ public:
auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{
{lowBoundInclusive, std::move(lowBound)}, {highBoundInclusive, std::move(highBound)}});
return {{PartialSchemaRequirements{
- {PartialSchemaKey{make<PathIdentity>()},
- PartialSchemaRequirement{boost::none /*boundProjectionName*/,
- std::move(intervalExpr),
- false /*isPerfOnly*/}}}}};
+ PSRExpr::makeSingularDNF(PartialSchemaKey{make<PathIdentity>()},
+ PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+ std::move(intervalExpr),
+ false /*isPerfOnly*/})}}};
}
ResultType transport(const ABT& n, const PathIdentity& pathIdentity) {
- return {{PartialSchemaRequirements{{{n},
- {boost::none /*boundProjectionName*/,
- IntervalReqExpr::makeSingularDNF(),
- false /*isPerfOnly*/}}}}};
+ return {{PartialSchemaRequirements{
+ PSRExpr::makeSingularDNF(PartialSchemaKey{n},
+ PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+ IntervalReqExpr::makeSingularDNF(),
+ false /*isPerfOnly*/})}}};
}
ResultType transport(const ABT& n, const Constant& c) {
@@ -666,11 +686,11 @@ public:
if (_pathToInterval) {
// If we have a path converter, attempt to convert directly into bounds.
if (auto conversion = _pathToInterval(n); conversion) {
- return {{PartialSchemaRequirements{
- {PartialSchemaKey{make<PathIdentity>()},
- PartialSchemaRequirement{boost::none /*boundProjectionName*/,
- std::move(*conversion),
- false /*isPerfOnly*/}}}}};
+ return {{PartialSchemaRequirements{PSRExpr::makeSingularDNF(
+ PartialSchemaKey{make<PathIdentity>()},
+ PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+ std::move(*conversion),
+ false /*isPerfOnly*/})}}};
}
}
@@ -959,7 +979,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
const PartialSchemaRequirements& source) {
- // TODO SERVER-74539 Implement intersect for top-level disjunctions.
+ // TODO SERVER-69026 Consider implementing intersect for top-level disjunctions.
if (!PSRExpr::isSingletonDisjunction(target.getRoot()) ||
!PSRExpr::isSingletonDisjunction(source.getRoot())) {
return false;
@@ -974,6 +994,22 @@ bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
return true;
}
+PartialSchemaRequirements unionPartialSchemaReq(PartialSchemaRequirements&& left,
+ PartialSchemaRequirements&& right) {
+ tassert(
+ 7453911, "unionPartialSchemaReq assumes DNF", left.getRoot().is<PSRExpr::Disjunction>());
+ tassert(
+ 7453910, "unionPartialSchemaReq assumes DNF", right.getRoot().is<PSRExpr::Disjunction>());
+
+ PSRExpr::Disjunction& leftDisj = *left.getRoot().cast<PSRExpr::Disjunction>();
+ PSRExpr::Disjunction& rightDisj = *right.getRoot().cast<PSRExpr::Disjunction>();
+ auto resultNodes = std::move(leftDisj.nodes());
+ resultNodes.insert(resultNodes.end(),
+ std::make_move_iterator(rightDisj.nodes().begin()),
+ std::make_move_iterator(rightDisj.nodes().end()));
+ return PSRExpr::make<PSRExpr::Disjunction>(std::move(resultNodes));
+}
+
std::string encodeIndexKeyName(const size_t indexField) {
std::ostringstream os;
os << kIndexKeyPrefix << " " << indexField;
@@ -1249,8 +1285,9 @@ CandidateIndexes computeCandidateIndexes(PrefixId& prefixId,
const ScanDefinition& scanDef,
const QueryHints& hints,
const ConstFoldFn& constFold) {
- // TODO SERVER-69026 or SERVER-74539: Identify candidate indexes for reqs which are not
- // singleton disjunctions.
+ // A candidate index is one that can directly satisfy the SargableNode, without using
+ // any other indexes. Typically a disjunction would require unioning two different indexes,
+ // so we bail out if there's a nontrivial disjunction here.
if (!PSRExpr::isSingletonDisjunction(reqMap.getRoot())) {
return {};
}
@@ -1318,64 +1355,75 @@ boost::optional<ScanParams> computeScanParams(PrefixId& prefixId,
const PartialSchemaRequirements& reqMap,
const ProjectionName& rootProj) {
ScanParams result;
-
- ResidualRequirements::Builder residReqs;
- residReqs.pushDisj().pushConj();
-
auto& fieldProjMap = result._fieldProjectionMap;
+ ResidualRequirements::Builder residReqs;
- // Expect a DNF with one disjunct; bail out if we have a nontrivial disjunction.
- if (!PSRExpr::isSingletonDisjunction(reqMap.getRoot())) {
- return {};
- }
-
+ bool invalid = false;
size_t entryIndex = 0;
- for (const auto& [key, req] : reqMap.conjuncts()) {
- if (req.getIsPerfOnly()) {
- // Ignore perf only requirements.
- continue;
- }
- if (key._projectionName != rootProj) {
- // We are not sitting right above a ScanNode.
- return {};
- }
+ residReqs.pushDisj();
+ PSRExpr::visitDisjuncts(reqMap.getRoot(), [&](const PSRExpr::Node& disjunct, size_t) {
+ residReqs.pushConj();
+ PSRExpr::visitConjuncts(disjunct, [&](const PSRExpr::Node& conjunct, size_t) {
+ PSRExpr::visitAtom(conjunct, [&](const PartialSchemaEntry& e) {
+ if (invalid) {
+ // Short circuit if we're going to return {} anyway.
+ return;
+ }
+ const auto& [key, req] = e;
+ if (req.getIsPerfOnly()) {
+ // Ignore perf only requirements.
+ return;
+ }
+ if (key._projectionName != rootProj) {
+ // We are not sitting right above a ScanNode.
+ invalid = true;
+ return;
+ }
- if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) {
- const FieldNameType& fieldName = pathGet->name();
-
- // Extract a new requirements path with removed simple paths.
- // For example if we have a key Get "a" Traverse Compare = 0 we leave only
- // Traverse Compare 0.
- if (const auto& boundProjName = req.getBoundProjectionName();
- boundProjName && pathGet->getPath().is<PathIdentity>()) {
- const auto [it, insertedInFPM] =
- fieldProjMap._fieldProjections.emplace(fieldName, *boundProjName);
-
- if (!insertedInFPM) {
- residReqs.atom(PartialSchemaKey{it->second, make<PathIdentity>()},
- PartialSchemaRequirement{req.getBoundProjectionName(),
- req.getIntervals(),
- false /*isPerfOnly*/},
- entryIndex);
- } else if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
- residReqs.atom(PartialSchemaKey{*boundProjName, make<PathIdentity>()},
- PartialSchemaRequirement{boost::none /*boundProjectionName*/,
- req.getIntervals(),
- false /*isPerfOnly*/},
- entryIndex);
+ if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) {
+ const FieldNameType& fieldName = pathGet->name();
+
+ // Extract a new requirements path with removed simple paths.
+ // For example if we have a key Get "a" Traverse Compare = 0 we leave
+ // only Traverse Compare 0.
+ if (const auto& boundProjName = req.getBoundProjectionName();
+ boundProjName && pathGet->getPath().is<PathIdentity>()) {
+ const auto [it, insertedInFPM] =
+ fieldProjMap._fieldProjections.emplace(fieldName, *boundProjName);
+
+ if (!insertedInFPM) {
+ residReqs.atom(PartialSchemaKey{it->second, make<PathIdentity>()},
+ PartialSchemaRequirement{req.getBoundProjectionName(),
+ req.getIntervals(),
+ false /*isPerfOnly*/},
+ entryIndex);
+ } else if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
+ residReqs.atom(
+ PartialSchemaKey{*boundProjName, make<PathIdentity>()},
+ PartialSchemaRequirement{boost::none /*boundProjectionName*/,
+ req.getIntervals(),
+ false /*isPerfOnly*/},
+ entryIndex);
+ }
+ } else {
+ const ProjectionName& tempProjName =
+ getExistingOrTempProjForFieldName(prefixId, fieldName, fieldProjMap);
+ residReqs.atom(
+ PartialSchemaKey{tempProjName, pathGet->getPath()}, req, entryIndex);
+ }
+ } else {
+ // Move other conditions into the residual map.
+ fieldProjMap._rootProjection = rootProj;
+ residReqs.atom(key, req, entryIndex);
}
- } else {
- const ProjectionName& tempProjName =
- getExistingOrTempProjForFieldName(prefixId, fieldName, fieldProjMap);
- residReqs.atom(PartialSchemaKey{tempProjName, pathGet->getPath()}, req, entryIndex);
- }
- } else {
- // Move other conditions into the residual map.
- fieldProjMap._rootProjection = rootProj;
- residReqs.atom(key, req, entryIndex);
- }
- entryIndex++;
+ entryIndex++;
+ });
+ });
+ residReqs.pop();
+ });
+ if (invalid) {
+ return {};
}
result._residualRequirements = residReqs.finish();
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 8b72a5f2c66..30b01ba9e9b 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -307,6 +307,9 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
const PartialSchemaRequirements& source);
+PartialSchemaRequirements unionPartialSchemaReq(PartialSchemaRequirements&& left,
+ PartialSchemaRequirements&& right);
+
/**
* Encode an index of an index field as a field name in order to use with a FieldProjectionMap.