diff options
author | David Percy <david.percy@mongodb.com> | 2023-03-14 16:00:26 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-03-30 21:53:55 +0000 |
commit | 187851fdfda231e63a13b1189a8f8566c57e701a (patch) | |
tree | 856363f5466fd8167a2af8262b02f1b82122dbd6 /src/mongo/db/query | |
parent | 6afa8dd2bb3435e3632375a545d8a2c76a1cd620 (diff) | |
download | mongo-187851fdfda231e63a13b1189a8f8566c57e701a.tar.gz |
SERVER-74539 Enable disjunction in PSR conversion
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/ce/test_utils.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/bool_expression.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/index_union_optimizer_test.cpp | 594 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp | 58 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/node.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/partial_schema_requirements.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/partial_schema_requirements.h | 26 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp | 185 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.cpp | 294 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.h | 3 |
12 files changed, 797 insertions, 418 deletions
diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h index a894be82783..3a9c7f9c78e 100644 --- a/src/mongo/db/query/ce/test_utils.h +++ b/src/mongo/db/query/ce/test_utils.h @@ -116,7 +116,7 @@ bool isSargableNode(const ABT& n) { // for a SargableNode with a specific number of predicates. For tests, we only care about // verifying the cardinality of that one. if (auto* sargable = n.cast<optimizer::SargableNode>()) { - return sargable->getReqMap().numLeaves() == NumReq; + return PSRExpr::numLeaves(sargable->getReqMap().getRoot()) == NumReq; } return false; } diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript index f9090848d4f..2abdccf1f04 100644 --- a/src/mongo/db/query/optimizer/SConscript +++ b/src/mongo/db/query/optimizer/SConscript @@ -130,6 +130,7 @@ env.CppUnitTest( target='optimizer_test', source=[ "bool_expression_test.cpp", + "index_union_optimizer_test.cpp", "logical_rewriter_optimizer_test.cpp", "optimizer_test.cpp", "physical_rewriter_optimizer_test.cpp", diff --git a/src/mongo/db/query/optimizer/bool_expression.h b/src/mongo/db/query/optimizer/bool_expression.h index 7e0371041b3..fb5a851781c 100644 --- a/src/mongo/db/query/optimizer/bool_expression.h +++ b/src/mongo/db/query/optimizer/bool_expression.h @@ -146,6 +146,10 @@ struct BoolExpr { return {}; } + static bool isSingularDNF(const Node& n) { + return getSingularDNF(n).has_value(); + } + using ChildVisitor = std::function<void(Node& child, const size_t childIndex)>; using ChildVisitorConst = std::function<void(const Node& child, const size_t childIndex)>; using AtomVisitor = std::function<void(T& expr)>; diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index d0d02208bc1..17052b694c4 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -566,7 +566,7 @@ static boost::optional<ABT> mergeSargableNodes( return createEmptyValueScanNode(ctx); } - if (mergedReqs.numLeaves() > SargableNode::kMaxPartialSchemaReqs) { + if (PSRExpr::numLeaves(mergedReqs.getRoot()) > SargableNode::kMaxPartialSchemaReqs) { return {}; } @@ -678,7 +678,7 @@ static void convertFilterToSargableNode(ABT::reference_type node, addEmptyValueScanNode(ctx); return; } - if (conversion->_reqMap.numLeaves() > SargableNode::kMaxPartialSchemaReqs) { + if (PSRExpr::numLeaves(conversion->_reqMap.getRoot()) > SargableNode::kMaxPartialSchemaReqs) { // Too many requirements. return; } @@ -1089,7 +1089,7 @@ struct SubstituteConvert<EvaluationNode> { uassert(6624165, "Should not be getting retainPredicate set for EvalNodes", !conversion->_retainPredicate); - if (conversion->_reqMap.numLeaves() != 1) { + if (PSRExpr::numLeaves(conversion->_reqMap.getRoot()) != 1) { // For evaluation nodes we expect to create a single entry. return; } @@ -1344,7 +1344,7 @@ struct ExploreConvert<SargableNode> { // try having at least one predicate on the left (mask = 1), and we try all possible // subsets. For index intersection however (isIndex = true), we try symmetric partitioning // (thus the high bound is 2^(N-1)). - const size_t reqSize = reqMap.numConjuncts(); + const size_t reqSize = PSRExpr::numLeaves(reqMap.getRoot()); // assumes singular DNF const size_t highMask = isIndex ? (1ull << (reqSize - 1)) : (1ull << reqSize); for (size_t mask = 1; mask < highMask; mask++) { SplitRequirementsResult splitResult = splitRequirements( diff --git a/src/mongo/db/query/optimizer/index_union_optimizer_test.cpp b/src/mongo/db/query/optimizer/index_union_optimizer_test.cpp new file mode 100644 index 00000000000..b3cc24fe192 --- /dev/null +++ b/src/mongo/db/query/optimizer/index_union_optimizer_test.cpp @@ -0,0 +1,594 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/abt/utils.h" +#include "mongo/db/query/optimizer/cascades/logical_props_derivation.h" +#include "mongo/db/query/optimizer/cascades/rewriter_rules.h" +#include "mongo/db/query/optimizer/explain.h" +#include "mongo/db/query/optimizer/metadata_factory.h" +#include "mongo/db/query/optimizer/node.h" +#include "mongo/db/query/optimizer/opt_phase_manager.h" +#include "mongo/db/query/optimizer/rewrites/const_eval.h" +#include "mongo/db/query/optimizer/utils/unit_test_abt_literals.h" +#include "mongo/db/query/optimizer/utils/unit_test_utils.h" +#include "mongo/unittest/unittest.h" + +using namespace mongo::optimizer::unit_test_abt_literals; + +namespace mongo::optimizer { +namespace { + +TEST(LogicalRewriter, MakeSargableNodeWithTopLevelDisjunction) { + // Hand-build SargableNode with top-level disjunction. + auto req = PartialSchemaRequirement( + boost::none, _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), false); + + auto makeKey = [](std::string pathName) { + return PartialSchemaKey("ptest", + make<PathGet>(FieldNameType{pathName}, make<PathIdentity>())); + }; + PSRExpr::Builder builder; + builder.pushDisj() + .pushConj() + .atom({makeKey("a"), req}) + .atom({makeKey("b"), req}) + .pop() + .pushConj() + .atom({makeKey("c"), req}) + .atom({makeKey("d"), req}) + .pop(); + auto reqs = PartialSchemaRequirements(builder.finish().get()); + + ABT scanNode = make<ScanNode>("ptest", "test"); + ABT sargableNode = make<SargableNode>( + reqs, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(scanNode)); + ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}}, + std::move(sargableNode)); + ASSERT_EXPLAIN_V2_AUTO( + "Root [{ptest}]\n" + "Sargable [Index]\n" + "| | | requirements: \n" + "| | | {\n" + "| | | {\n" + "| | | {refProjection: ptest, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [1]}}}}\n" + "| | | ^ \n" + "| | | {refProjection: ptest, path: 'PathGet [b] PathIdentity []', " + "intervals: {{{=Const [1]}}}}\n" + "| | | }\n" + "| | | U \n" + "| | | {\n" + "| | | {refProjection: ptest, path: 'PathGet [c] PathIdentity []', " + "intervals: {{{=Const [1]}}}}\n" + "| | | ^ \n" + "| | | {refProjection: ptest, path: 'PathGet [d] PathIdentity []', " + "intervals: {{{=Const [1]}}}}\n" + "| | | }\n" + "| | | }\n" + "| | candidateIndexes: \n" + "Scan [test, {ptest}]\n", + rootNode); + + // Show that hashing a top-level disjunction doesn't throw. + ABTHashGenerator::generate(rootNode); +} + +TEST(LogicalRewriter, ToplevelDisjunctionConversion) { + // When we have a Filter with a top-level disjunction, + // it gets translated to a Sargable node with top-level disjunction. + + // {$or: [ {a: 2}, {b: 3} ]} + ABT rootNode = NodeBuilder{} + .root("scan_0") + .filter(_evalf(_composea(_get("a", _cmp("Eq", "2"_cint64)), + _get("b", _cmp("Eq", "3"_cint64))), + "scan_0"_var)) + .finish(_scan("scan_0", "coll")); + + auto prefixId = PrefixId::createForTests(); + auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase}, + prefixId, + {{{"coll", createScanDef({}, {})}}}, + boost::none /*costModel*/, + DebugInfo::kDefaultForTests, + QueryHints{}); + + ABT optimized = rootNode; + phaseManager.optimize(optimized); + + ASSERT_EXPLAIN_V2_AUTO( + "Root [{scan_0}]\n" + "Sargable [Complete]\n" + "| | | | requirements: \n" + "| | | | {\n" + "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [2]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', " + "intervals: {{{=Const [3]}}}}}\n" + "| | | | }\n" + "| | | candidateIndexes: \n" + "| | scanParams: \n" + "| | {'a': evalTemp_0, 'b': evalTemp_1}\n" + "| | residualReqs: \n" + "| | {\n" + "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: " + "{{{=Const [2]}}}, entryIndex: 0}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: " + "{{{=Const [3]}}}, entryIndex: 1}}\n" + "| | }\n" + "Scan [coll, {scan_0}]\n", + optimized); +} + +TEST(LogicalRewriter, ToplevelNestedDisjunctionConversion) { + // When we have a Filter with a top-level disjunction, + // it gets translated to a Sargable node with top-level disjunction, + // even if it's a nested disjunction. + + // {$or: [{$or: [{a: 2}. {b: 3}]}, {$or: [{c: 4}, {b: 5}]}]} + ABT rootNode = NodeBuilder{} + .root("scan_0") + .filter(_evalf(_composea(_composea(_get("a", _cmp("Eq", "2"_cint64)), + _get("b", _cmp("Eq", "3"_cint64))), + _composea(_get("c", _cmp("Eq", "4"_cint64)), + _get("d", _cmp("Eq", "5"_cint64)))), + "scan_0"_var)) + .finish(_scan("scan_0", "coll")); + + auto prefixId = PrefixId::createForTests(); + auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase}, + prefixId, + {{{"coll", createScanDef({}, {})}}}, + boost::none /*costModel*/, + DebugInfo::kDefaultForTests, + QueryHints{}); + + ABT optimized = rootNode; + phaseManager.optimize(optimized); + + ASSERT_EXPLAIN_V2_AUTO( + "Root [{scan_0}]\n" + "Sargable [Complete]\n" + "| | | | requirements: \n" + "| | | | {\n" + "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [2]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', " + "intervals: {{{=Const [3]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [c] PathIdentity []', " + "intervals: {{{=Const [4]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [d] PathIdentity []', " + "intervals: {{{=Const [5]}}}}}\n" + "| | | | }\n" + "| | | candidateIndexes: \n" + "| | scanParams: \n" + "| | {'a': evalTemp_0, 'b': evalTemp_1, 'c': evalTemp_2, 'd': evalTemp_3}\n" + "| | residualReqs: \n" + "| | {\n" + "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: " + "{{{=Const [2]}}}, entryIndex: 0}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: " + "{{{=Const [3]}}}, entryIndex: 1}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_2, path: 'PathIdentity []', intervals: " + "{{{=Const [4]}}}, entryIndex: 2}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_3, path: 'PathIdentity []', intervals: " + "{{{=Const [5]}}}, entryIndex: 3}}\n" + "| | }\n" + "Scan [coll, {scan_0}]\n", + optimized); +} + +TEST(LogicalRewriter, ComplexBooleanConversion) { + + auto leaf0 = _get("a", _cmp("Eq", "0"_cint64)); + auto leaf1 = _get("b", _cmp("Eq", "1"_cint64)); + auto leaf2 = _get("c", _cmp("Eq", "2"_cint64)); + auto leaf3 = _get("d", _cmp("Eq", "3"_cint64)); + auto leaf4 = _get("e", _cmp("Eq", "4"_cint64)); + auto leaf5 = _get("f", _cmp("Eq", "5"_cint64)); + auto path = _composem( + leaf0, _composea(leaf1, _composem(leaf2, _composea(leaf3, _composem(leaf4, leaf5))))); + ABT rootNode = NodeBuilder{} + .root("scan_0") + .filter(_evalf(path, "scan_0"_var)) + .finish(_scan("scan_0", "coll")); + + auto prefixId = PrefixId::createForTests(); + auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase}, + prefixId, + {{{"coll", createScanDef({}, {})}}}, + boost::none /*costModel*/, + DebugInfo::kDefaultForTests, + QueryHints{}); + + ABT optimized = rootNode; + phaseManager.optimize(optimized); + + // For now PSR conversion fails because the result would not be DNF. + ASSERT_EXPLAIN_V2_AUTO( + "Root [{scan_0}]\n" + "Filter []\n" + "| EvalFilter []\n" + "| | Variable [scan_0]\n" + "| PathComposeA []\n" + "| | PathComposeM []\n" + "| | | PathComposeA []\n" + "| | | | PathComposeM []\n" + "| | | | | PathGet [f]\n" + "| | | | | PathCompare [Eq]\n" + "| | | | | Const [5]\n" + "| | | | PathGet [e]\n" + "| | | | PathCompare [Eq]\n" + "| | | | Const [4]\n" + "| | | PathGet [d]\n" + "| | | PathCompare [Eq]\n" + "| | | Const [3]\n" + "| | PathGet [c]\n" + "| | PathCompare [Eq]\n" + "| | Const [2]\n" + "| PathGet [b]\n" + "| PathCompare [Eq]\n" + "| Const [1]\n" + "Sargable [Complete]\n" + "| | | | requirements: \n" + "| | | | {{{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [0]}}}}}}\n" + "| | | candidateIndexes: \n" + "| | scanParams: \n" + "| | {'a': evalTemp_0}\n" + "| | residualReqs: \n" + "| | {{{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: " + "{{{=Const [0]}}}, entryIndex: 0}}}\n" + "Scan [coll, {scan_0}]\n", + optimized); +} + +TEST(LogicalRewriter, DisjunctionProjectionConversion) { + + auto leaf0 = _get("a", _cmp("Eq", "0"_cint64)); + auto leaf1 = _get("b", _cmp("Eq", "1"_cint64)); + auto path = _composea(leaf0, leaf1); + ABT rootNode = NodeBuilder{} + .root("doc") + .eval("doc", _evalp(_keep(FieldNameType{"x"}), "scan_0"_var)) + .filter(_evalf(path, "scan_0"_var)) + .finish(_scan("scan_0", "coll")); + + auto prefixId = PrefixId::createForTests(); + auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase}, + prefixId, + {{{"coll", createScanDef({}, {})}}}, + boost::none /*costModel*/, + DebugInfo::kDefaultForTests, + QueryHints{}); + + ABT optimized = rootNode; + phaseManager.optimize(optimized); + + // We get two Sargable nodes, but they aren't combined, because converting to DNF would + // distribute the projection into both disjuncts, and for now we don't want to have + // projections inside a (nontrivial) disjunction. + ASSERT_EXPLAIN_V2_AUTO( + "Root [{doc}]\n" + "Evaluation [{doc}]\n" + "| EvalPath []\n" + "| | Const [{}]\n" + "| PathField [x]\n" + "| PathConstant []\n" + "| Variable [fieldProj_0]\n" + "Sargable [Complete]\n" + "| | | | requirements: \n" + "| | | | {{{refProjection: scan_0, path: 'PathGet [x] PathIdentity []', " + "boundProjection: fieldProj_0, intervals: {{{<fully open>}}}}}}\n" + "| | | candidateIndexes: \n" + "| | scanParams: \n" + "| | {'x': fieldProj_0}\n" + "Sargable [Complete]\n" + "| | | | requirements: \n" + "| | | | {\n" + "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [0]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', " + "intervals: {{{=Const [1]}}}}}\n" + "| | | | }\n" + "| | | candidateIndexes: \n" + "| | scanParams: \n" + "| | {'a': evalTemp_0, 'b': evalTemp_1}\n" + "| | residualReqs: \n" + "| | {\n" + "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: " + "{{{=Const [0]}}}, entryIndex: 0}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: " + "{{{=Const [1]}}}, entryIndex: 1}}\n" + "| | }\n" + "Scan [coll, {scan_0}]\n", + optimized); +} + +TEST(LogicalRewriter, DisjunctionConversionDedup) { + + auto leaf0 = _get("a", _cmp("Eq", "0"_cint64)); + auto leaf1 = _get("b", _cmp("Eq", "1"_cint64)); + auto path = _composea(_composea(leaf0, leaf1), _composea(leaf0, leaf0)); + ABT rootNode = NodeBuilder{} + .root("scan_0") + .filter(_evalf(path, "scan_0"_var)) + .finish(_scan("scan_0", "coll")); + + auto prefixId = PrefixId::createForTests(); + auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase}, + prefixId, + {{{"coll", createScanDef({}, {})}}}, + boost::none /*costModel*/, + DebugInfo::kDefaultForTests, + QueryHints{}); + + ABT optimized = rootNode; + phaseManager.optimize(optimized); + + // We should see everything get reordered and deduped, + // so each of the leaf predicates appears once. + // TODO SERVER-73827 We should get 2 leaf predicates instead of 3 here. + ASSERT_EXPLAIN_V2_AUTO( + "Root [{scan_0}]\n" + "Sargable [Complete]\n" + "| | | | requirements: \n" + "| | | | {\n" + "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [0]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [a] PathIdentity []', " + "intervals: {{{=Const [0]}}}}}\n" + "| | | | U \n" + "| | | | {{refProjection: scan_0, path: 'PathGet [b] PathIdentity []', " + "intervals: {{{=Const [1]}}}}}\n" + "| | | | }\n" + "| | | candidateIndexes: \n" + "| | scanParams: \n" + "| | {'a': evalTemp_0, 'b': evalTemp_1}\n" + "| | residualReqs: \n" + "| | {\n" + "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: " + "{{{=Const [0]}}}, entryIndex: 0}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_0, path: 'PathIdentity []', intervals: " + "{{{=Const [0]}}}, entryIndex: 1}}\n" + "| | U \n" + "| | {{refProjection: evalTemp_1, path: 'PathIdentity []', intervals: " + "{{{=Const [1]}}}, entryIndex: 2}}\n" + "| | }\n" + "Scan [coll, {scan_0}]\n", + optimized); +} + +TEST(PhysRewriter, LowerRequirementsWithTopLevelDisjunction) { + auto req = + PartialSchemaRequirement(boost::none, + _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), + false /*perfOnly*/); + + auto makeKey = [](std::string pathName) { + return PartialSchemaKey("ptest", + make<PathGet>(FieldNameType{pathName}, make<PathIdentity>())); + }; + + CEType scanGroupCE{10.0}; + FieldProjectionMap fieldProjectionMap; + fieldProjectionMap._rootProjection = "ptest"; + std::vector<SelectivityType> indexPredSels; + + PhysPlanBuilder builder; + builder.make<PhysicalScanNode>( + scanGroupCE, fieldProjectionMap, "test" /* scanDefName */, false /* parallelScan */); + + ResidualRequirementsWithOptionalCE::Builder residReqsBuilder; + residReqsBuilder.pushDisj() + .pushConj() + .atom({makeKey("a"), req, CEType{2.0}}) + .atom({makeKey("b"), req, CEType{3.0}}) + .pop() + .pushConj() + .atom({makeKey("c"), req, CEType{5.0}}) + .atom({makeKey("d"), req, CEType{4.0}}) + .pop(); + auto residReqs = residReqsBuilder.finish().get(); + lowerPartialSchemaRequirements( + scanGroupCE, indexPredSels, residReqs, defaultConvertPathToInterval, builder); + + ASSERT_EXPLAIN_V2_AUTO( + "Filter []\n" + "| BinaryOp [Or]\n" + "| | BinaryOp [And]\n" + "| | | EvalFilter []\n" + "| | | | Variable [ptest]\n" + "| | | PathGet [c]\n" + "| | | PathCompare [Eq]\n" + "| | | Const [1]\n" + "| | EvalFilter []\n" + "| | | Variable [ptest]\n" + "| | PathGet [d]\n" + "| | PathCompare [Eq]\n" + "| | Const [1]\n" + "| BinaryOp [And]\n" + "| | EvalFilter []\n" + "| | | Variable [ptest]\n" + "| | PathGet [b]\n" + "| | PathCompare [Eq]\n" + "| | Const [1]\n" + "| EvalFilter []\n" + "| | Variable [ptest]\n" + "| PathGet [a]\n" + "| PathCompare [Eq]\n" + "| Const [1]\n" + "PhysicalScan [{'<root>': ptest}, test]\n", + builder._node); +} + +TEST(PhysRewriter, OptimizeSargableNodeWithTopLevelDisjunction) { + auto req = + PartialSchemaRequirement(boost::none, + _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), + false /*perfOnly*/); + + auto makeKey = [](std::string pathName) { + return PartialSchemaKey("ptest", + make<PathGet>(FieldNameType{pathName}, make<PathIdentity>())); + }; + + // Create three SargableNodes with top-level disjunctions. + PSRExpr::Builder builder; + builder.pushDisj() + .pushConj() + .atom({makeKey("a"), req}) + .atom({makeKey("b"), req}) + .pop() + .pushConj() + .atom({makeKey("c"), req}) + .atom({makeKey("d"), req}) + .pop(); + auto reqs1 = PartialSchemaRequirements(builder.finish().get()); + + builder.pushDisj() + .pushConj() + .atom({makeKey("e"), req}) + .pop() + .pushConj() + .atom({makeKey("f"), req}) + .pop(); + auto reqs2 = PartialSchemaRequirements(builder.finish().get()); + + builder.pushDisj().pushConj().atom({makeKey("g"), req}).pop(); + auto reqs3 = PartialSchemaRequirements(builder.finish().get()); + + // During logical optimization, the SargableNodes not directly above the Scan will first be + // lowered to Filter nodes based on their requirements. The SargableNode immediately above the + // Scan will be lowered later based on its residual requirements. + ResidualRequirements::Builder residReqs; + residReqs.pushDisj() + .pushConj() + .atom({makeKey("a"), req, 0}) + .atom({makeKey("b"), req, 1}) + .pop() + .pushConj() + .atom({makeKey("c"), req, 2}) + .atom({makeKey("d"), req, 3}) + .pop(); + ScanParams scanParams; + scanParams._residualRequirements = residReqs.finish(); + + ABT scanNode = make<ScanNode>("ptest", "test"); + ABT sargableNode1 = make<SargableNode>( + reqs1, CandidateIndexes(), scanParams, IndexReqTarget::Index, std::move(scanNode)); + ABT sargableNode2 = make<SargableNode>( + reqs2, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode1)); + ABT sargableNode3 = make<SargableNode>( + reqs3, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode2)); + ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}}, + std::move(sargableNode3)); + + // Show that the optimization of the SargableNode does not throw, and that all three + // SargableNodes are correctly lowered to FilterNodes. + auto prefixId = PrefixId::createForTests(); + auto phaseManager = makePhaseManager( + {OptPhase::MemoSubstitutionPhase, + OptPhase::MemoExplorationPhase, + OptPhase::MemoImplementationPhase}, + prefixId, + {{{"test", + createScanDef( + {}, + { + // For now, verify that we do not get an indexed plan even when there + // are indexes available on the queried fields. + {"ab", + IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}, + {makeIndexPath("b"), CollationOp::Ascending}}, + false /*isMultiKey*/, + {DistributionType::Centralized}, + {}}}, + {"cd", + IndexDefinition{{{makeIndexPath("c"), CollationOp::Ascending}, + {makeIndexPath("d"), CollationOp::Ascending}}, + false /*isMultiKey*/, + {DistributionType::Centralized}, + {}}}, + {"e", makeIndexDefinition("e", CollationOp::Ascending, false /*isMultiKey*/)}, + {"f", makeIndexDefinition("f", CollationOp::Ascending, false /*isMultiKey*/)}, + {"g", makeIndexDefinition("g", CollationOp::Ascending, false /*isMultiKey*/)}, + })}}}, + boost::none /*costModel*/, + DebugInfo::kDefaultForTests); + phaseManager.optimize(rootNode); + + ASSERT_EXPLAIN_V2Compact_AUTO( + "Root [{ptest}]\n" + "Filter []\n" + "| EvalFilter []\n" + "| | Variable [ptest]\n" + "| PathGet [g] PathCompare [Eq] Const [1]\n" + "Filter []\n" + "| BinaryOp [Or]\n" + "| | EvalFilter []\n" + "| | | Variable [ptest]\n" + "| | PathGet [f] PathCompare [Eq] Const [1]\n" + "| EvalFilter []\n" + "| | Variable [ptest]\n" + "| PathGet [e] PathCompare [Eq] Const [1]\n" + "Filter []\n" + "| BinaryOp [Or]\n" + "| | BinaryOp [And]\n" + "| | | EvalFilter []\n" + "| | | | Variable [ptest]\n" + "| | | PathGet [d] PathCompare [Eq] Const [1]\n" + "| | EvalFilter []\n" + "| | | Variable [ptest]\n" + "| | PathGet [c] PathCompare [Eq] Const [1]\n" + "| BinaryOp [And]\n" + "| | EvalFilter []\n" + "| | | Variable [ptest]\n" + "| | PathGet [b] PathCompare [Eq] Const [1]\n" + "| EvalFilter []\n" + "| | Variable [ptest]\n" + "| PathGet [a] PathCompare [Eq] Const [1]\n" + "PhysicalScan [{'<root>': ptest}, test]\n", + rootNode); +} + +} // namespace +} // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp index a66cb8b81a3..3947ee71d87 100644 --- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp @@ -38,6 +38,7 @@ #include "mongo/db/query/optimizer/utils/unit_test_utils.h" #include "mongo/unittest/unittest.h" +using namespace mongo::optimizer::unit_test_abt_literals; namespace mongo::optimizer { namespace { @@ -1555,7 +1556,6 @@ TEST(LogicalRewriter, NotPushdownUnderLambdaKeepOuterTraverse) { } TEST(LogicalRewriter, NotPushdownUnderLambdaFailsWithFreeVar) { - using namespace mongo::optimizer::unit_test_abt_literals; // When we eliminate a Not, we can't eliminate the Lambda [x] if it would leave free // occurrences of 'x'. @@ -2027,61 +2027,5 @@ TEST(LogicalRewriter, EmptyArrayIndexBounds) { rootNode); } -TEST(LogicalRewriter, MakeSargableNodeWithTopLevelDisjunction) { - using namespace unit_test_abt_literals; - - // Hand-build SargableNode with top-level disjunction. - auto req = PartialSchemaRequirement( - boost::none, _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), false); - - auto makeKey = [](std::string pathName) { - return PartialSchemaKey("ptest", - make<PathGet>(FieldNameType{pathName}, make<PathIdentity>())); - }; - PSRExpr::Builder builder; - builder.pushDisj() - .pushConj() - .atom({makeKey("a"), req}) - .atom({makeKey("b"), req}) - .pop() - .pushConj() - .atom({makeKey("c"), req}) - .atom({makeKey("d"), req}) - .pop(); - auto reqs = PartialSchemaRequirements(builder.finish().get()); - - ABT scanNode = make<ScanNode>("ptest", "test"); - ABT sargableNode = make<SargableNode>( - reqs, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(scanNode)); - ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}}, - std::move(sargableNode)); - ASSERT_EXPLAIN_V2_AUTO( - "Root [{ptest}]\n" - "Sargable [Index]\n" - "| | | requirements: \n" - "| | | {\n" - "| | | {\n" - "| | | {refProjection: ptest, path: 'PathGet [a] PathIdentity []', " - "intervals: {{{=Const [1]}}}}\n" - "| | | ^ \n" - "| | | {refProjection: ptest, path: 'PathGet [b] PathIdentity []', " - "intervals: {{{=Const [1]}}}}\n" - "| | | }\n" - "| | | U \n" - "| | | {\n" - "| | | {refProjection: ptest, path: 'PathGet [c] PathIdentity []', " - "intervals: {{{=Const [1]}}}}\n" - "| | | ^ \n" - "| | | {refProjection: ptest, path: 'PathGet [d] PathIdentity []', " - "intervals: {{{=Const [1]}}}}\n" - "| | | }\n" - "| | | }\n" - "| | candidateIndexes: \n" - "Scan [test, {ptest}]\n", - rootNode); - - // Show that hashing a top-level disjunction doesn't throw. - ABTHashGenerator::generate(rootNode); -} } // namespace } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp index 99b48fb0970..e4e8892119e 100644 --- a/src/mongo/db/query/optimizer/node.cpp +++ b/src/mongo/db/query/optimizer/node.cpp @@ -405,9 +405,12 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap, tassert(6624085, "SargableNode requires at least one predicate", !_reqMap.isNoop()); tassert( 7447500, "SargableNode requirements should be in DNF", PSRExpr::isDNF(_reqMap.getRoot())); - tassert(6624086, - str::stream() << "SargableNode has too many predicates: " << _reqMap.numLeaves(), - _reqMap.numLeaves() <= kMaxPartialSchemaReqs); + if (const size_t numLeaves = PSRExpr::numLeaves(_reqMap.getRoot()); + numLeaves > kMaxPartialSchemaReqs) { + tasserted(6624086, + str::stream() << "SargableNode has too many predicates: " << numLeaves + << ". We allow at most " << kMaxPartialSchemaReqs); + } auto bindings = createSargableBindings(_reqMap); tassert(7410100, diff --git a/src/mongo/db/query/optimizer/partial_schema_requirements.cpp b/src/mongo/db/query/optimizer/partial_schema_requirements.cpp index 0659675ccee..40b54d4d61d 100644 --- a/src/mongo/db/query/optimizer/partial_schema_requirements.cpp +++ b/src/mongo/db/query/optimizer/partial_schema_requirements.cpp @@ -64,26 +64,12 @@ private: } }; -// Make a BoolExpr representing a conjunction of the entries. It will be an OR of a single AND. -PSRExpr::Node makePSRExpr(std::vector<PartialSchemaEntry> entries) { - PSRExpr::Builder b; - b.pushDisj().pushConj(); - for (auto& entry : entries) { - b.atom(std::move(entry)); - } - - auto res = b.finish(); - tassert(7016402, "PartialSchemaRequirements could not be constructed", res.has_value()); - return res.get(); -} - // A no-op entry has a default key and a requirement that is fully open and does not bind. PartialSchemaEntry makeNoopPartialSchemaEntry() { return {PartialSchemaKey(), PartialSchemaRequirement( boost::none /*boundProjectionName*/, - IntervalReqExpr::makeSingularDNF(IntervalRequirement( - BoundRequirement::makeMinusInf(), BoundRequirement::makePlusInf())), + IntervalReqExpr::makeSingularDNF(IntervalRequirement{/*fully open*/}), false /*isPerfOnly*/)}; } } // namespace @@ -92,9 +78,6 @@ void PartialSchemaRequirements::normalize() { PSRNormalizeTransporter{}.normalize(_expr); } -PartialSchemaRequirements::PartialSchemaRequirements(std::vector<Entry> entries) - : PartialSchemaRequirements(makePSRExpr(entries)) {} - PartialSchemaRequirements::PartialSchemaRequirements(PSRExpr::Node requirements) : _expr(std::move(requirements)) { tassert(7016403, @@ -113,7 +96,7 @@ bool PartialSchemaRequirements::operator==(const PartialSchemaRequirements& othe bool PartialSchemaRequirements::isNoop() const { // A PartialSchemaRequirements is a no-op if it has exactly zero predicates/projections... - auto numPreds = numLeaves(); + const size_t numPreds = PSRExpr::numLeaves(getRoot()); if (numPreds == 0) { return true; } else if (numPreds > 1) { @@ -135,14 +118,6 @@ bool PartialSchemaRequirements::isNoop() const { return reqIsNoop; } -size_t PartialSchemaRequirements::numLeaves() const { - return PSRExpr::numLeaves(_expr); -} - -size_t PartialSchemaRequirements::numConjuncts() const { - return numLeaves(); -} - boost::optional<ProjectionName> PartialSchemaRequirements::findProjection( const PartialSchemaKey& key) const { tassert(7453908, @@ -177,6 +152,8 @@ PartialSchemaRequirements::findFirstConjunct(const PartialSchemaKey& key) const void PartialSchemaRequirements::add(PartialSchemaKey key, PartialSchemaRequirement req) { tassert(7016406, "Expected a PartialSchemaRequirements in DNF form", PSRExpr::isDNF(_expr)); + // TODO SERVER-69026 Consider applying the distributive law. + tassert(7453912, "Expected a singleton disjunction", PSRExpr::isSingletonDisjunction(_expr)); // Add an entry to the first conjunction PSRExpr::visitDisjuncts(_expr, [&](PSRExpr::Node& disjunct, const size_t i) { diff --git a/src/mongo/db/query/optimizer/partial_schema_requirements.h b/src/mongo/db/query/optimizer/partial_schema_requirements.h index fb84db887fd..47feba141e8 100644 --- a/src/mongo/db/query/optimizer/partial_schema_requirements.h +++ b/src/mongo/db/query/optimizer/partial_schema_requirements.h @@ -123,11 +123,6 @@ public: PartialSchemaRequirements(PSRExpr::Node requirements); - // TODO SERVER-74539: remove these contructors. - PartialSchemaRequirements(std::vector<Entry>); - PartialSchemaRequirements(std::initializer_list<Entry> entries) - : PartialSchemaRequirements(std::vector<Entry>(entries)) {} - bool operator==(const PartialSchemaRequirements& other) const; /** @@ -137,17 +132,6 @@ public: bool isNoop() const; /** - * Return the number of PartialSchemaEntries. - */ - size_t numLeaves() const; - - /** - * Return the number of Disjunctions under a top-level Conjunction. - * TODO SERVER-69026 Remove or clarify this method. - */ - size_t numConjuncts() const; - - /** * Return the bound projection name corresponding to the first conjunct matching the given key. * Assert on non-DNF requirements. */ @@ -187,8 +171,14 @@ public: } /** - * Add an entry to the first AND under a top-level OR. Asserts on non-DNF requirements. - * TODO SERVER-74539, remove or clarify this method. + * Conjunctively combine 'this' with another PartialSchemaRequirement. + * Asserts that 'this' is in DNF. + * + * For now, we assert that we have only one disjunct. This means we avoid applying + * the distributive law, which would duplicate the new requirement into each disjunct. + * + * TODO SERVER-69026 Consider applying the distributive law to allow contained-OR in + * SargableNode. */ void add(PartialSchemaKey, PartialSchemaRequirement); diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp index 8057ae362b7..18cc3052610 100644 --- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp @@ -5398,190 +5398,5 @@ TEST(PhysRewriter, ExtractAllPlans) { "| minKey], Const [1 | maxKey]]}]\n", getExplainForPlan(2)); } - -TEST(PhysRewriter, LowerRequirementsWithTopLevelDisjunction) { - auto req = - PartialSchemaRequirement(boost::none, - _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), - false /*perfOnly*/); - - auto makeKey = [](std::string pathName) { - return PartialSchemaKey("ptest", - make<PathGet>(FieldNameType{pathName}, make<PathIdentity>())); - }; - - CEType scanGroupCE{10.0}; - FieldProjectionMap fieldProjectionMap; - fieldProjectionMap._rootProjection = "ptest"; - std::vector<SelectivityType> indexPredSels; - - PhysPlanBuilder builder; - builder.make<PhysicalScanNode>( - scanGroupCE, fieldProjectionMap, "test" /* scanDefName */, false /* parallelScan */); - - ResidualRequirementsWithOptionalCE::Builder residReqsBuilder; - residReqsBuilder.pushDisj() - .pushConj() - .atom({makeKey("a"), req, CEType{2.0}}) - .atom({makeKey("b"), req, CEType{3.0}}) - .pop() - .pushConj() - .atom({makeKey("c"), req, CEType{5.0}}) - .atom({makeKey("d"), req, CEType{4.0}}) - .pop(); - auto residReqs = residReqsBuilder.finish().get(); - lowerPartialSchemaRequirements( - scanGroupCE, indexPredSels, residReqs, defaultConvertPathToInterval, builder); - - ASSERT_EXPLAIN_V2_AUTO( - "Filter []\n" - "| BinaryOp [Or]\n" - "| | BinaryOp [And]\n" - "| | | EvalFilter []\n" - "| | | | Variable [ptest]\n" - "| | | PathGet [c]\n" - "| | | PathCompare [Eq]\n" - "| | | Const [1]\n" - "| | EvalFilter []\n" - "| | | Variable [ptest]\n" - "| | PathGet [d]\n" - "| | PathCompare [Eq]\n" - "| | Const [1]\n" - "| BinaryOp [And]\n" - "| | EvalFilter []\n" - "| | | Variable [ptest]\n" - "| | PathGet [b]\n" - "| | PathCompare [Eq]\n" - "| | Const [1]\n" - "| EvalFilter []\n" - "| | Variable [ptest]\n" - "| PathGet [a]\n" - "| PathCompare [Eq]\n" - "| Const [1]\n" - "PhysicalScan [{'<root>': ptest}, test]\n", - builder._node); -} - -TEST(PhysRewriter, OptimizeSargableNodeWithTopLevelDisjunction) { - auto req = - PartialSchemaRequirement(boost::none, - _disj(_conj(_interval(_incl("1"_cint32), _incl("1"_cint32)))), - false /*perfOnly*/); - - auto makeKey = [](std::string pathName) { - return PartialSchemaKey("ptest", - make<PathGet>(FieldNameType{pathName}, make<PathIdentity>())); - }; - - // Create three SargableNodes with top-level disjunctions. - PSRExpr::Builder builder; - builder.pushDisj() - .pushConj() - .atom({makeKey("a"), req}) - .atom({makeKey("b"), req}) - .pop() - .pushConj() - .atom({makeKey("c"), req}) - .atom({makeKey("d"), req}) - .pop(); - auto reqs1 = PartialSchemaRequirements(builder.finish().get()); - - builder.pushDisj() - .pushConj() - .atom({makeKey("e"), req}) - .pop() - .pushConj() - .atom({makeKey("f"), req}) - .pop(); - auto reqs2 = PartialSchemaRequirements(builder.finish().get()); - - builder.pushDisj().pushConj().atom({makeKey("g"), req}).pop(); - auto reqs3 = PartialSchemaRequirements(builder.finish().get()); - - // During logical optimization, the SargableNodes not directly above the Scan will first be - // lowered to Filter nodes based on their requirements. The SargableNode immediately above the - // Scan will be lowered later based on its residual requirements. - ResidualRequirements::Builder residReqs; - residReqs.pushDisj() - .pushConj() - .atom({makeKey("a"), req, 0}) - .atom({makeKey("b"), req, 1}) - .pop() - .pushConj() - .atom({makeKey("c"), req, 2}) - .atom({makeKey("d"), req, 3}) - .pop(); - ScanParams scanParams; - scanParams._residualRequirements = residReqs.finish(); - - ABT scanNode = make<ScanNode>("ptest", "test"); - ABT sargableNode1 = make<SargableNode>( - reqs1, CandidateIndexes(), scanParams, IndexReqTarget::Index, std::move(scanNode)); - ABT sargableNode2 = make<SargableNode>( - reqs2, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode1)); - ABT sargableNode3 = make<SargableNode>( - reqs3, CandidateIndexes(), boost::none, IndexReqTarget::Index, std::move(sargableNode2)); - ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"ptest"}}, - std::move(sargableNode3)); - - // Show that the optimization of the SargableNode does not throw, and that all three - // SargableNodes are correctly lowered to FilterNodes. - auto prefixId = PrefixId::createForTests(); - auto phaseManager = makePhaseManager({OptPhase::MemoSubstitutionPhase, - OptPhase::MemoExplorationPhase, - OptPhase::MemoImplementationPhase}, - prefixId, - {{{"test", ScanDefinition()}}}, - boost::none /*costModel*/, - DebugInfo::kDefaultForTests); - phaseManager.optimize(rootNode); - - ASSERT_EXPLAIN_V2_AUTO( - "Root [{ptest}]\n" - "Filter []\n" - "| EvalFilter []\n" - "| | Variable [ptest]\n" - "| PathGet [g]\n" - "| PathCompare [Eq]\n" - "| Const [1]\n" - "Filter []\n" - "| BinaryOp [Or]\n" - "| | EvalFilter []\n" - "| | | Variable [ptest]\n" - "| | PathGet [f]\n" - "| | PathCompare [Eq]\n" - "| | Const [1]\n" - "| EvalFilter []\n" - "| | Variable [ptest]\n" - "| PathGet [e]\n" - "| PathCompare [Eq]\n" - "| Const [1]\n" - "Filter []\n" - "| BinaryOp [Or]\n" - "| | BinaryOp [And]\n" - "| | | EvalFilter []\n" - "| | | | Variable [ptest]\n" - "| | | PathGet [d]\n" - "| | | PathCompare [Eq]\n" - "| | | Const [1]\n" - "| | EvalFilter []\n" - "| | | Variable [ptest]\n" - "| | PathGet [c]\n" - "| | PathCompare [Eq]\n" - "| | Const [1]\n" - "| BinaryOp [And]\n" - "| | EvalFilter []\n" - "| | | Variable [ptest]\n" - "| | PathGet [b]\n" - "| | PathCompare [Eq]\n" - "| | Const [1]\n" - "| EvalFilter []\n" - "| | Variable [ptest]\n" - "| PathGet [a]\n" - "| PathCompare [Eq]\n" - "| Const [1]\n" - "PhysicalScan [{'<root>': ptest}, test]\n", - rootNode); -} } // namespace } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index be43d300c6e..145784ae905 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -278,39 +278,43 @@ public: leftResult->_hasIntersected = true; return leftResult; } - // Additive composition: we never have projections in this case; only predicates. - for (const auto& [key, req] : leftReqMap.conjuncts()) { - tassert(7155021, - "Unexpected binding in ComposeA in PartialSchemaReqConverter", - !req.getBoundProjectionName()); - } - for (const auto& [key, req] : rightReqMap.conjuncts()) { - tassert(7155022, - "Unexpected binding in ComposeA in PartialSchemaReqConverter", - !req.getBoundProjectionName()); - } + // From this point on we only handle additive composition. + + // Check if the left and right requirements are all or none perf-only. + size_t perfOnlyCount = 0; + for (const auto* reqs : {&leftReqMap, &rightReqMap}) { + PSRExpr::visitAnyShape(reqs->getRoot(), [&](const PartialSchemaEntry& e) { + const auto& [key, req] = e; + // Additive composition should only have predicates; no projections. + tassert(7155021, + "Unexpected binding in ComposeA in PartialSchemaReqConverter", + !req.getBoundProjectionName()); - { - // Check if the left and right requirements are all or none perf-only. - size_t perfOnlyCount = 0; - for (const auto& [key, req] : leftReqMap.conjuncts()) { - if (req.getIsPerfOnly()) { - perfOnlyCount++; - } - } - for (const auto& [key, req] : rightReqMap.conjuncts()) { if (req.getIsPerfOnly()) { perfOnlyCount++; } - } - if (perfOnlyCount != 0 && - perfOnlyCount != leftReqMap.numLeaves() + rightReqMap.numLeaves()) { - // For now allow only predicates with the same perf-only flag. - return {}; - } + }); + } + if (perfOnlyCount != 0 && + perfOnlyCount != + PSRExpr::numLeaves(leftReqMap.getRoot()) + + PSRExpr::numLeaves(rightReqMap.getRoot())) { + // For now allow only predicates with the same perf-only flag. + return {}; + } + + if (ResultType sameFieldDisj = createSameFieldDisjunction(leftResult, rightResult)) { + return sameFieldDisj; } - return createSameFieldDisjunction(leftResult, rightResult); + // Handle a general disjunction. + auto result = PartialSchemaReqConversion{ + unionPartialSchemaReq(std::move(leftReqMap), std::move(rightReqMap))}; + if (leftResult->_retainPredicate || rightResult->_retainPredicate) { + // If either argument is an over-approximation, then so is the result. + result._retainPredicate = true; + } + return result; } /** @@ -319,12 +323,21 @@ public: * * When this function returns a nonempty optional, it may modify or move from the arguments. * When it returns boost::none the arguments are unchanged. + * + * TODO SERVER-73827 Instead of handling these special cases, just construct a disjunction and + * then simplify; and get rid of this function. */ static ResultType createSameFieldDisjunction(ResultType& leftResult, ResultType& rightResult) { auto& leftReqMap = leftResult->_reqMap; auto& rightReqMap = rightResult->_reqMap; + if (!PSRExpr::isSingletonDisjunction(leftReqMap.getRoot()) || + !PSRExpr::isSingletonDisjunction(rightReqMap.getRoot())) { + return {}; + } + auto leftEntries = leftReqMap.conjuncts(); auto rightEntries = rightReqMap.conjuncts(); + auto leftEntry = leftEntries.begin(); auto rightEntry = rightEntries.begin(); auto& [leftKey, leftReq] = *leftEntry; @@ -369,7 +382,8 @@ public: } // Left and right don't all use the same key. - if (leftReqMap.numLeaves() != 1 || rightReqMap.numLeaves() != 1) { + if (PSRExpr::numLeaves(leftReqMap.getRoot()) != 1 || + PSRExpr::numLeaves(rightReqMap.getRoot()) != 1) { return {}; } // Left and right don't all use the same key, but they both have exactly 1 entry. @@ -483,19 +497,28 @@ public: auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{ {true /*inclusive*/, Constant::null()}, {true /*inclusive*/, Constant::null()}}); - return {{PartialSchemaRequirements{ - {PartialSchemaKey{make<PathIdentity>()}, - PartialSchemaRequirement{boost::none /*boundProjectionName*/, - std::move(intervalExpr), - false /*isPerfOnly*/}}}}}; + return {{PartialSchemaRequirements{PSRExpr::makeSingularDNF( + PartialSchemaKey{make<PathIdentity>()}, + PartialSchemaRequirement{boost::none /*boundProjectionName*/, + std::move(intervalExpr), + false /*isPerfOnly*/})}}}; } return handleComposition<false /*isMultiplicative*/>(std::move(leftResult), std::move(rightResult)); } + /** + * Prepend a Get or Traverse to each Atom in the argument reqMap. + * + * 'n' specifies a single node to prepend. The child of 'n' is ignored. For example + * if 'n' is 'Get [a] Get [b] Id' then this function prepends 'Get [a]' to 'inputResult'. + * + * Only considers atoms with an empty input binding. Atoms with a nonempty input binding are + * ignored. + */ template <class T> - static ResultType handleGetAndTraverse(const ABT& n, ResultType inputResult) { + static ResultType prependGetOrTraverse(const ABT& n, ResultType inputResult) { if (!inputResult) { return {}; } @@ -503,15 +526,7 @@ public: return {}; } - // New map has keys with appended paths. - PSRExpr::Builder newReqs; - newReqs.pushDisj().pushConj(); - - for (const auto& entry : inputResult->_reqMap.conjuncts()) { - if (entry.first._projectionName) { - return {}; - } - + PSRExpr::visitAnyShape(inputResult->_reqMap.getRoot(), [&](PartialSchemaEntry& entry) { ABT path = entry.first._path; // Updated key path to be now rooted at n, with existing key path as child. @@ -519,29 +534,33 @@ public: std::swap(appendedPath.cast<T>()->getPath(), path); std::swap(path, appendedPath); - newReqs.atom(PartialSchemaKey{std::move(path)}, std::move(entry.second)); - } - - inputResult->_reqMap = std::move(*newReqs.finish()); + entry.first._path = path; + }); return inputResult; } ResultType transport(const ABT& n, const PathGet& pathGet, ResultType inputResult) { - return handleGetAndTraverse<PathGet>(n, std::move(inputResult)); + return prependGetOrTraverse<PathGet>(n, std::move(inputResult)); } ResultType transport(const ABT& n, const PathTraverse& pathTraverse, ResultType inputResult) { if (!inputResult) { return {}; } - if (inputResult->_reqMap.numConjuncts() > 1) { - // More than one requirement means we are handling a conjunction inside a traverse. + + if (!PSRExpr::isSingularDNF(inputResult->_reqMap.getRoot())) { + // More than one requirement means we may have a conjunction inside a traverse. // We can change it to a traverse inside a conjunction, but that's an // over-approximation, so we have to keep the original predicate. inputResult->_retainPredicate = true; + + // Note that we could improve this by pushing traverse through disjunction without loss + // of precision. 'Traverse (ComposeA X Y) == ComposeA (Traverse X) (Traverse Y)' because + // Traverse is a disjunction over array elements, so it's ok to re-associate the + // disjunctions. } - auto result = handleGetAndTraverse<PathTraverse>(n, std::move(inputResult)); + auto result = prependGetOrTraverse<PathTraverse>(n, std::move(inputResult)); if (result) { result->_hasTraversed = true; } @@ -585,10 +604,10 @@ public: } return {{PartialSchemaRequirements{ - {PartialSchemaKey{make<PathIdentity>()}, - PartialSchemaRequirement{boost::none /*boundProjectionName*/, - std::move(*unionedInterval), - false /*isPerfOnly*/}}}}}; + PSRExpr::makeSingularDNF(PartialSchemaKey{make<PathIdentity>()}, + PartialSchemaRequirement{boost::none /*boundProjectionName*/, + std::move(*unionedInterval), + false /*isPerfOnly*/})}}}; } ResultType transport(const ABT& n, const PathCompare& pathCompare, ResultType inputResult) { @@ -635,17 +654,18 @@ public: auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{ {lowBoundInclusive, std::move(lowBound)}, {highBoundInclusive, std::move(highBound)}}); return {{PartialSchemaRequirements{ - {PartialSchemaKey{make<PathIdentity>()}, - PartialSchemaRequirement{boost::none /*boundProjectionName*/, - std::move(intervalExpr), - false /*isPerfOnly*/}}}}}; + PSRExpr::makeSingularDNF(PartialSchemaKey{make<PathIdentity>()}, + PartialSchemaRequirement{boost::none /*boundProjectionName*/, + std::move(intervalExpr), + false /*isPerfOnly*/})}}}; } ResultType transport(const ABT& n, const PathIdentity& pathIdentity) { - return {{PartialSchemaRequirements{{{n}, - {boost::none /*boundProjectionName*/, - IntervalReqExpr::makeSingularDNF(), - false /*isPerfOnly*/}}}}}; + return {{PartialSchemaRequirements{ + PSRExpr::makeSingularDNF(PartialSchemaKey{n}, + PartialSchemaRequirement{boost::none /*boundProjectionName*/, + IntervalReqExpr::makeSingularDNF(), + false /*isPerfOnly*/})}}}; } ResultType transport(const ABT& n, const Constant& c) { @@ -666,11 +686,11 @@ public: if (_pathToInterval) { // If we have a path converter, attempt to convert directly into bounds. if (auto conversion = _pathToInterval(n); conversion) { - return {{PartialSchemaRequirements{ - {PartialSchemaKey{make<PathIdentity>()}, - PartialSchemaRequirement{boost::none /*boundProjectionName*/, - std::move(*conversion), - false /*isPerfOnly*/}}}}}; + return {{PartialSchemaRequirements{PSRExpr::makeSingularDNF( + PartialSchemaKey{make<PathIdentity>()}, + PartialSchemaRequirement{boost::none /*boundProjectionName*/, + std::move(*conversion), + false /*isPerfOnly*/})}}}; } } @@ -959,7 +979,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, bool intersectPartialSchemaReq(PartialSchemaRequirements& target, const PartialSchemaRequirements& source) { - // TODO SERVER-74539 Implement intersect for top-level disjunctions. + // TODO SERVER-69026 Consider implementing intersect for top-level disjunctions. if (!PSRExpr::isSingletonDisjunction(target.getRoot()) || !PSRExpr::isSingletonDisjunction(source.getRoot())) { return false; @@ -974,6 +994,22 @@ bool intersectPartialSchemaReq(PartialSchemaRequirements& target, return true; } +PartialSchemaRequirements unionPartialSchemaReq(PartialSchemaRequirements&& left, + PartialSchemaRequirements&& right) { + tassert( + 7453911, "unionPartialSchemaReq assumes DNF", left.getRoot().is<PSRExpr::Disjunction>()); + tassert( + 7453910, "unionPartialSchemaReq assumes DNF", right.getRoot().is<PSRExpr::Disjunction>()); + + PSRExpr::Disjunction& leftDisj = *left.getRoot().cast<PSRExpr::Disjunction>(); + PSRExpr::Disjunction& rightDisj = *right.getRoot().cast<PSRExpr::Disjunction>(); + auto resultNodes = std::move(leftDisj.nodes()); + resultNodes.insert(resultNodes.end(), + std::make_move_iterator(rightDisj.nodes().begin()), + std::make_move_iterator(rightDisj.nodes().end())); + return PSRExpr::make<PSRExpr::Disjunction>(std::move(resultNodes)); +} + std::string encodeIndexKeyName(const size_t indexField) { std::ostringstream os; os << kIndexKeyPrefix << " " << indexField; @@ -1249,8 +1285,9 @@ CandidateIndexes computeCandidateIndexes(PrefixId& prefixId, const ScanDefinition& scanDef, const QueryHints& hints, const ConstFoldFn& constFold) { - // TODO SERVER-69026 or SERVER-74539: Identify candidate indexes for reqs which are not - // singleton disjunctions. + // A candidate index is one that can directly satisfy the SargableNode, without using + // any other indexes. Typically a disjunction would require unioning two different indexes, + // so we bail out if there's a nontrivial disjunction here. if (!PSRExpr::isSingletonDisjunction(reqMap.getRoot())) { return {}; } @@ -1318,64 +1355,75 @@ boost::optional<ScanParams> computeScanParams(PrefixId& prefixId, const PartialSchemaRequirements& reqMap, const ProjectionName& rootProj) { ScanParams result; - - ResidualRequirements::Builder residReqs; - residReqs.pushDisj().pushConj(); - auto& fieldProjMap = result._fieldProjectionMap; + ResidualRequirements::Builder residReqs; - // Expect a DNF with one disjunct; bail out if we have a nontrivial disjunction. - if (!PSRExpr::isSingletonDisjunction(reqMap.getRoot())) { - return {}; - } - + bool invalid = false; size_t entryIndex = 0; - for (const auto& [key, req] : reqMap.conjuncts()) { - if (req.getIsPerfOnly()) { - // Ignore perf only requirements. - continue; - } - if (key._projectionName != rootProj) { - // We are not sitting right above a ScanNode. - return {}; - } + residReqs.pushDisj(); + PSRExpr::visitDisjuncts(reqMap.getRoot(), [&](const PSRExpr::Node& disjunct, size_t) { + residReqs.pushConj(); + PSRExpr::visitConjuncts(disjunct, [&](const PSRExpr::Node& conjunct, size_t) { + PSRExpr::visitAtom(conjunct, [&](const PartialSchemaEntry& e) { + if (invalid) { + // Short circuit if we're going to return {} anyway. + return; + } + const auto& [key, req] = e; + if (req.getIsPerfOnly()) { + // Ignore perf only requirements. + return; + } + if (key._projectionName != rootProj) { + // We are not sitting right above a ScanNode. + invalid = true; + return; + } - if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) { - const FieldNameType& fieldName = pathGet->name(); - - // Extract a new requirements path with removed simple paths. - // For example if we have a key Get "a" Traverse Compare = 0 we leave only - // Traverse Compare 0. - if (const auto& boundProjName = req.getBoundProjectionName(); - boundProjName && pathGet->getPath().is<PathIdentity>()) { - const auto [it, insertedInFPM] = - fieldProjMap._fieldProjections.emplace(fieldName, *boundProjName); - - if (!insertedInFPM) { - residReqs.atom(PartialSchemaKey{it->second, make<PathIdentity>()}, - PartialSchemaRequirement{req.getBoundProjectionName(), - req.getIntervals(), - false /*isPerfOnly*/}, - entryIndex); - } else if (!isIntervalReqFullyOpenDNF(req.getIntervals())) { - residReqs.atom(PartialSchemaKey{*boundProjName, make<PathIdentity>()}, - PartialSchemaRequirement{boost::none /*boundProjectionName*/, - req.getIntervals(), - false /*isPerfOnly*/}, - entryIndex); + if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) { + const FieldNameType& fieldName = pathGet->name(); + + // Extract a new requirements path with removed simple paths. + // For example if we have a key Get "a" Traverse Compare = 0 we leave + // only Traverse Compare 0. + if (const auto& boundProjName = req.getBoundProjectionName(); + boundProjName && pathGet->getPath().is<PathIdentity>()) { + const auto [it, insertedInFPM] = + fieldProjMap._fieldProjections.emplace(fieldName, *boundProjName); + + if (!insertedInFPM) { + residReqs.atom(PartialSchemaKey{it->second, make<PathIdentity>()}, + PartialSchemaRequirement{req.getBoundProjectionName(), + req.getIntervals(), + false /*isPerfOnly*/}, + entryIndex); + } else if (!isIntervalReqFullyOpenDNF(req.getIntervals())) { + residReqs.atom( + PartialSchemaKey{*boundProjName, make<PathIdentity>()}, + PartialSchemaRequirement{boost::none /*boundProjectionName*/, + req.getIntervals(), + false /*isPerfOnly*/}, + entryIndex); + } + } else { + const ProjectionName& tempProjName = + getExistingOrTempProjForFieldName(prefixId, fieldName, fieldProjMap); + residReqs.atom( + PartialSchemaKey{tempProjName, pathGet->getPath()}, req, entryIndex); + } + } else { + // Move other conditions into the residual map. + fieldProjMap._rootProjection = rootProj; + residReqs.atom(key, req, entryIndex); } - } else { - const ProjectionName& tempProjName = - getExistingOrTempProjForFieldName(prefixId, fieldName, fieldProjMap); - residReqs.atom(PartialSchemaKey{tempProjName, pathGet->getPath()}, req, entryIndex); - } - } else { - // Move other conditions into the residual map. - fieldProjMap._rootProjection = rootProj; - residReqs.atom(key, req, entryIndex); - } - entryIndex++; + entryIndex++; + }); + }); + residReqs.pop(); + }); + if (invalid) { + return {}; } result._residualRequirements = residReqs.finish(); diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index 8b72a5f2c66..30b01ba9e9b 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -307,6 +307,9 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, bool intersectPartialSchemaReq(PartialSchemaRequirements& target, const PartialSchemaRequirements& source); +PartialSchemaRequirements unionPartialSchemaReq(PartialSchemaRequirements&& left, + PartialSchemaRequirements&& right); + /** * Encode an index of an index field as a field name in order to use with a FieldProjectionMap. |