diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2022-12-08 23:09:31 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-08 23:49:11 +0000 |
commit | eacea9f519203e06174e80b962735ec0c7fdda5a (patch) | |
tree | 11b2f0006d3f3da183186d5ea4f03c542e2ecac7 | |
parent | e5ca3d956bf20effd512f8eb0654f88f0e156a4f (diff) | |
download | mongo-eacea9f519203e06174e80b962735ec0c7fdda5a.tar.gz |
SERVER-70639 [CQF] Implement Spool physical node
18 files changed, 696 insertions, 51 deletions
diff --git a/src/mongo/db/exec/sbe/abt/abt_lower.cpp b/src/mongo/db/exec/sbe/abt/abt_lower.cpp index 18348ab9ac8..8d47db3830d 100644 --- a/src/mongo/db/exec/sbe/abt/abt_lower.cpp +++ b/src/mongo/db/exec/sbe/abt/abt_lower.cpp @@ -28,7 +28,6 @@ */ #include "mongo/db/exec/sbe/abt/abt_lower.h" -#include "mongo/db/exec/sbe/stages/bson_scan.h" #include "mongo/db/exec/sbe/stages/co_scan.h" #include "mongo/db/exec/sbe/stages/exchange.h" #include "mongo/db/exec/sbe/stages/filter.h" @@ -42,11 +41,13 @@ #include "mongo/db/exec/sbe/stages/scan.h" #include "mongo/db/exec/sbe/stages/sort.h" #include "mongo/db/exec/sbe/stages/sorted_merge.h" +#include "mongo/db/exec/sbe/stages/spool.h" #include "mongo/db/exec/sbe/stages/union.h" #include "mongo/db/exec/sbe/stages/unique.h" #include "mongo/db/exec/sbe/stages/unwind.h" #include "mongo/db/query/optimizer/utils/utils.h" + namespace mongo::optimizer { static sbe::EExpression::Vector toInlinedVector( @@ -576,6 +577,61 @@ std::unique_ptr<sbe::PlanStage> SBENodeLowering::walk(const UniqueNode& n, return sbe::makeS<sbe::UniqueStage>(std::move(input), std::move(keySlots), planNodeId); } +std::unique_ptr<sbe::PlanStage> SBENodeLowering::walk(const SpoolProducerNode& n, + const ABT& child, + const ABT& filter, + const ABT& binder, + const ABT& refs) { + auto input = generateInternal(child); + + sbe::value::SlotVector vals; + for (const ProjectionName& projectionName : n.binder().names()) { + auto it = _slotMap.find(projectionName); + uassert(6624139, + str::stream() << "undefined variable: " << projectionName, + it != _slotMap.end()); + vals.push_back(it->second); + } + + const PlanNodeId planNodeId = _nodeToGroupPropsMap.at(&n)._planNodeId; + switch (n.getType()) { + case SpoolProducerType::Eager: + return sbe::makeS<sbe::SpoolEagerProducerStage>( + std::move(input), n.getSpoolId(), std::move(vals), planNodeId); + + case SpoolProducerType::Lazy: { + auto expr = SBEExpressionLowering{_env, _slotMap}.optimize(filter); + return sbe::makeS<sbe::SpoolLazyProducerStage>( + std::move(input), n.getSpoolId(), std::move(vals), std::move(expr), planNodeId); + } + } + + MONGO_UNREACHABLE; +} + +std::unique_ptr<sbe::PlanStage> SBENodeLowering::walk(const SpoolConsumerNode& n, + const ABT& binder) { + sbe::value::SlotVector vals; + for (const ProjectionName& projectionName : n.binder().names()) { + auto slot = _slotIdGenerator.generate(); + _slotMap.emplace(projectionName, slot); + vals.push_back(slot); + } + + const PlanNodeId planNodeId = _nodeToGroupPropsMap.at(&n)._planNodeId; + switch (n.getType()) { + case SpoolConsumerType::Stack: + return sbe::makeS<sbe::SpoolConsumerStage<true /*isStack*/>>( + n.getSpoolId(), std::move(vals), planNodeId); + + case SpoolConsumerType::Regular: + return sbe::makeS<sbe::SpoolConsumerStage<false /*isStack*/>>( + n.getSpoolId(), std::move(vals), planNodeId); + } + + MONGO_UNREACHABLE; +} + std::unique_ptr<sbe::PlanStage> SBENodeLowering::walk(const GroupByNode& n, const ABT& child, const ABT& aggBinds, diff --git a/src/mongo/db/exec/sbe/abt/abt_lower.h b/src/mongo/db/exec/sbe/abt/abt_lower.h index bdab4602d4a..ac4c7f49ccf 100644 --- a/src/mongo/db/exec/sbe/abt/abt_lower.h +++ b/src/mongo/db/exec/sbe/abt/abt_lower.h @@ -108,9 +108,11 @@ public: // The default noop transport. template <typename T, typename... Ts> std::unique_ptr<sbe::PlanStage> walk(const T&, Ts&&...) { - if constexpr (std::is_base_of_v<ExclusivelyLogicalNode, T>) { - uasserted(6624238, "A physical plan should not contain exclusively logical nodes."); - } + // We should not be seeing a physical delegator node here. + static_assert(!canBePhysicalNode<T>() || std::is_same_v<MemoPhysicalDelegatorNode, T>, + "Physical nodes need to implement lowering"); + + uasserted(6624238, "Unexpected node type."); return nullptr; } @@ -127,6 +129,13 @@ public: std::unique_ptr<sbe::PlanStage> walk(const UniqueNode& n, const ABT& child, const ABT& refs); + std::unique_ptr<sbe::PlanStage> walk(const SpoolProducerNode& n, + const ABT& child, + const ABT& filter, + const ABT& binder, + const ABT& refs); + std::unique_ptr<sbe::PlanStage> walk(const SpoolConsumerNode& n, const ABT& binder); + std::unique_ptr<sbe::PlanStage> walk(const GroupByNode& n, const ABT& child, const ABT& aggBinds, diff --git a/src/mongo/db/exec/sbe/abt/sbe_abt_test.cpp b/src/mongo/db/exec/sbe/abt/sbe_abt_test.cpp index 25a42fefcee..8758ae83786 100644 --- a/src/mongo/db/exec/sbe/abt/sbe_abt_test.cpp +++ b/src/mongo/db/exec/sbe/abt/sbe_abt_test.cpp @@ -39,6 +39,7 @@ #include "mongo/db/query/optimizer/utils/unit_test_utils.h" #include "mongo/unittest/unittest.h" + namespace mongo::optimizer { namespace { @@ -677,5 +678,174 @@ TEST_F(NodeSBE, RequireRID) { ASSERT_EQ(1, resultSize); } +/** + * This transport is used to populate default values into the NodeToGroupProps map to get around the + * fact that the plan was not obtained from the memo. At this point we are interested only in the + * planNodeIds being distinct. + */ +class PropsTransport { +public: + template <typename T, typename... Ts> + void transport(const T& node, NodeToGroupPropsMap& propMap, Ts&&...) { + if constexpr (std::is_base_of_v<Node, T>) { + propMap.emplace(&node, + NodeProps{_planNodeId++, + {-1, 0} /*groupId*/, + {} /*logicalProps*/, + {} /*physicalProps*/, + boost::none /*ridProjName*/, + CostType::kZero /*cost*/, + CostType::kZero /*localCost*/, + 0.0 /*adjustedCE*/}); + } + } + + void updatePropsMap(const ABT& n, NodeToGroupPropsMap& propMap) { + algebra::transport<false>(n, *this, propMap); + } + +private: + int32_t _planNodeId = 0; +}; + +TEST_F(NodeSBE, SpoolFibonacci) { + using namespace unit_test_abt_literals; + + PrefixId prefixId; + Metadata metadata{{}}; + + // Construct a spool-based recursive plan to compute the first 10 Fibonacci numbers. The main + // plan (first child of the union) sets up the initial conditions (val = 1, prev = 0, and it = + // 1), and the recursive subplan is computing the actual Fibonacci sequence and ensures we + // terminate after 10 numbers. + auto recursion = + NodeBuilder{} + .eval("val", _binary("Add", "valIn"_var, "valIn_prev"_var)) + .eval("val_prev", "valIn"_var) + .eval("it", _binary("Add", "itIn"_var, "1"_cint64)) + .filter(_binary("Lt", "itIn"_var, "10"_cint64)) + .finish(_spoolc("Stack", 1 /*spoolId*/, _varnames("valIn", "valIn_prev", "itIn"))); + + auto tree = NodeBuilder{} + .root("val") + .spoolp("Lazy", 1 /*spoolId*/, _varnames("val", "val_prev", "it"), _cbool(true)) + .un(_varnames("val", "val_prev", "it"), {NodeHolder{std::move(recursion)}}) + .eval("val", "1"_cint64) + .eval("val_prev", "0"_cint64) + .eval("it", "1"_cint64) + .ls(1, 0) + .finish(_coscan()); + + ASSERT_EXPLAIN_V2_AUTO( + "Root []\n" + "| | projections: \n" + "| | val\n" + "| RefBlock: \n" + "| Variable [val]\n" + "SpoolProducer [Lazy, id: 1]\n" + "| | Const [true]\n" + "| BindBlock:\n" + "| [it]\n" + "| Source []\n" + "| [val]\n" + "| Source []\n" + "| [val_prev]\n" + "| Source []\n" + "Union []\n" + "| | BindBlock:\n" + "| | [it]\n" + "| | Source []\n" + "| | [val]\n" + "| | Source []\n" + "| | [val_prev]\n" + "| | Source []\n" + "| Evaluation []\n" + "| | BindBlock:\n" + "| | [val]\n" + "| | BinaryOp [Add]\n" + "| | | Variable [valIn_prev]\n" + "| | Variable [valIn]\n" + "| Evaluation []\n" + "| | BindBlock:\n" + "| | [val_prev]\n" + "| | Variable [valIn]\n" + "| Evaluation []\n" + "| | BindBlock:\n" + "| | [it]\n" + "| | BinaryOp [Add]\n" + "| | | Const [1]\n" + "| | Variable [itIn]\n" + "| Filter []\n" + "| | BinaryOp [Lt]\n" + "| | | Const [10]\n" + "| | Variable [itIn]\n" + "| SpoolConsumer [Stack, id: 1]\n" + "| BindBlock:\n" + "| [itIn]\n" + "| Source []\n" + "| [valIn]\n" + "| Source []\n" + "| [valIn_prev]\n" + "| Source []\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [val]\n" + "| Const [1]\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [val_prev]\n" + "| Const [0]\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [it]\n" + "| Const [1]\n" + "LimitSkip []\n" + "| limitSkip:\n" + "| limit: 1\n" + "| skip: 0\n" + "CoScan []\n", + tree); + + NodeToGroupPropsMap props; + PropsTransport{}.updatePropsMap(tree, props); + + auto env = VariableEnvironment::build(tree); + SlotVarMap map; + boost::optional<sbe::value::SlotId> ridSlot; + sbe::value::SlotIdGenerator ids; + SBENodeLowering g{env, map, ridSlot, ids, metadata, props, false /*randomScan*/}; + auto sbePlan = g.optimize(tree); + ASSERT_EQ(1, map.size()); + + auto opCtx = makeOperationContext(); + sbe::CompileCtx ctx(std::make_unique<sbe::RuntimeEnvironment>()); + sbePlan->prepare(ctx); + + std::vector<sbe::value::SlotAccessor*> accessors; + for (auto& [name, slot] : map) { + accessors.emplace_back(sbePlan->getAccessor(ctx, slot)); + } + + sbePlan->attachToOperationContext(opCtx.get()); + sbePlan->open(false); + + std::vector<int64_t> results; + while (sbePlan->getNext() != sbe::PlanState::IS_EOF) { + const auto [resultTag, resultVal] = accessors.front()->getViewOfValue(); + ASSERT_EQ(sbe::value::TypeTags::NumberInt64, resultTag); + results.push_back(resultVal); + }; + sbePlan->close(); + + // Verify we are getting 10 Fibonacci numbers. + ASSERT_EQ(10, results.size()); + + ASSERT_EQ(1, results.at(0)); + ASSERT_EQ(1, results.at(1)); + for (size_t i = 2; i < 10; i++) { + ASSERT_EQ(results.at(i), results.at(i - 1) + results.at(i - 2)); + } +} + } // namespace } // namespace mongo::optimizer diff --git a/src/mongo/db/query/cost_model/cost_estimator_impl.cpp b/src/mongo/db/query/cost_model/cost_estimator_impl.cpp index 86ea2a4efb1..c4526f27d10 100644 --- a/src/mongo/db/query/cost_model/cost_estimator_impl.cpp +++ b/src/mongo/db/query/cost_model/cost_estimator_impl.cpp @@ -228,6 +228,19 @@ public: return {uniqueCost, _cardinalityEstimate}; } + CostAndCEInternal operator()(const ABT& /*n*/, const SpoolProducerNode& node) { + CostAndCEInternal childResult = deriveChild(node.getChild(), 0); + // TODO: SERVER-71821: Calibration for Spool producer node. + const double cost = _coefficients.getDefaultStartupCost() + childResult._cost; + return {cost, _cardinalityEstimate}; + } + + CostAndCEInternal operator()(const ABT& /*n*/, const SpoolConsumerNode& node) { + // TODO: SERVER-71822: Calibration for Spool consumer node. + const double cost = _coefficients.getDefaultStartupCost(); + return {cost, _cardinalityEstimate}; + } + CostAndCEInternal operator()(const ABT& /*n*/, const CollationNode& node) { CostAndCEInternal childResult = deriveChild(node.getChild(), 0); // TODO: consider RepetitionEstimate since this is a stateful operation. diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index c7ff34eee42..89b2bfda6d4 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -266,39 +266,6 @@ struct ReorderDependencies { bool _hasNodeAndChildRef = false; }; -template <class NodeType> -struct DefaultChildAccessor { - const ABT& operator()(const ABT& node) const { - return node.cast<NodeType>()->getChild(); - } - - ABT& operator()(ABT& node) const { - return node.cast<NodeType>()->getChild(); - } -}; - -template <class NodeType> -struct LeftChildAccessor { - const ABT& operator()(const ABT& node) const { - return node.cast<NodeType>()->getLeftChild(); - } - - ABT& operator()(ABT& node) const { - return node.cast<NodeType>()->getLeftChild(); - } -}; - -template <class NodeType> -struct RightChildAccessor { - const ABT& operator()(const ABT& node) const { - return node.cast<NodeType>()->getRightChild(); - } - - ABT& operator()(ABT& node) const { - return node.cast<NodeType>()->getRightChild(); - } -}; - template <class AboveType, class BelowType, template <class> class BelowChildAccessor = DefaultChildAccessor> diff --git a/src/mongo/db/query/optimizer/defs.h b/src/mongo/db/query/optimizer/defs.h index 61f5e587e13..48be0041f33 100644 --- a/src/mongo/db/query/optimizer/defs.h +++ b/src/mongo/db/query/optimizer/defs.h @@ -234,7 +234,7 @@ struct CostAndCE { MAKE_PRINTABLE_ENUM(CollationOp, COLLATIONOP_OPNAMES); MAKE_PRINTABLE_ENUM_STRING_ARRAY(CollationOpEnum, CollationOp, COLLATIONOP_OPNAMES); -#undef PATHSYNTAX_OPNAMES +#undef COLLATIONOP_OPNAMES using ProjectionCollationEntry = std::pair<ProjectionName, CollationOp>; using ProjectionCollationSpec = std::vector<ProjectionCollationEntry>; diff --git a/src/mongo/db/query/optimizer/explain.cpp b/src/mongo/db/query/optimizer/explain.cpp index 3d6923af7f2..71844701274 100644 --- a/src/mongo/db/query/optimizer/explain.cpp +++ b/src/mongo/db/query/optimizer/explain.cpp @@ -1724,6 +1724,52 @@ public: } ExplainPrinter transport(const ABT& n, + const SpoolProducerNode& node, + ExplainPrinter childResult, + ExplainPrinter filterResult, + ExplainPrinter bindResult, + ExplainPrinter refsResult) { + ExplainPrinter printer("SpoolProducer"); + maybePrintProps(printer, node); + + printer.separator(" [") + .fieldName("type", ExplainVersion::V3) + .print(SpoolProducerTypeEnum::toString[static_cast<int>(node.getType())]) + .separator(", ") + .fieldName("id") + .print(node.getSpoolId()) + .separator("]"); + + nodeCEPropsPrint(printer, n, node); + printer.setChildCount(3); + printer.fieldName("filter", ExplainVersion::V3).print(filterResult); + printer.fieldName("bindings", ExplainVersion::V3).print(bindResult); + printer.fieldName("child", ExplainVersion::V3).print(childResult); + + return printer; + } + + ExplainPrinter transport(const ABT& n, + const SpoolConsumerNode& node, + ExplainPrinter bindResult) { + ExplainPrinter printer("SpoolConsumer"); + maybePrintProps(printer, node); + + printer.separator(" [") + .fieldName("type", ExplainVersion::V3) + .print(SpoolConsumerTypeEnum::toString[static_cast<int>(node.getType())]) + .separator(", ") + .fieldName("id") + .print(node.getSpoolId()) + .separator("]"); + + nodeCEPropsPrint(printer, n, node); + printer.fieldName("bindings", ExplainVersion::V3).print(bindResult); + + return printer; + } + + ExplainPrinter transport(const ABT& n, const CollationNode& node, ExplainPrinter childResult, ExplainPrinter refsResult) { diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp index 3ad726fa2b7..f61d7e826f6 100644 --- a/src/mongo/db/query/optimizer/node.cpp +++ b/src/mongo/db/query/optimizer/node.cpp @@ -828,6 +828,71 @@ const ABT& UniqueNode::getChild() const { return get<0>(); } +SpoolProducerNode::SpoolProducerNode(const SpoolProducerType type, + const int64_t spoolId, + ProjectionNameVector projections, + ABT filter, + ABT child) + : Base(std::move(child), + std::move(filter), + buildSimpleBinder(projections), + make<References>(projections)), + _type(type), + _spoolId(spoolId) { + assertNodeSort(getChild()); + assertExprSort(getFilter()); + tassert( + 6624155, "Spool producer must have a non-empty projection list", !binder().names().empty()); + tassert(6624120, + "Invalid combination of spool producer type and spool filter", + _type == SpoolProducerType::Lazy || filter == Constant::boolean(true)); +} + +bool SpoolProducerNode::operator==(const SpoolProducerNode& other) const { + return _type == other._type && _spoolId == other._spoolId && getFilter() == other.getFilter() && + binder() == other.binder(); +} + +SpoolProducerType SpoolProducerNode::getType() const { + return _type; +} + +int64_t SpoolProducerNode::getSpoolId() const { + return _spoolId; +} + +const ABT& SpoolProducerNode::getFilter() const { + return get<1>(); +} + +const ABT& SpoolProducerNode::getChild() const { + return get<0>(); +} + +ABT& SpoolProducerNode::getChild() { + return get<0>(); +} + +SpoolConsumerNode::SpoolConsumerNode(const SpoolConsumerType type, + const int64_t spoolId, + ProjectionNameVector projections) + : Base(buildSimpleBinder(projections)), _type(type), _spoolId(spoolId) { + tassert( + 6624125, "Spool consumer must have a non-empty projection list", !binder().names().empty()); +} + +bool SpoolConsumerNode::operator==(const SpoolConsumerNode& other) const { + return _type == other._type && _spoolId == other._spoolId && binder() == other.binder(); +} + +SpoolConsumerType SpoolConsumerNode::getType() const { + return _type; +} + +int64_t SpoolConsumerNode::getSpoolId() const { + return _spoolId; +} + CollationNode::CollationNode(properties::CollationRequirement property, ABT child) : Base(std::move(child), buildReferences(extractReferencedColumns(properties::makePhysProps(property)))), diff --git a/src/mongo/db/query/optimizer/node.h b/src/mongo/db/query/optimizer/node.h index 614d51f20c0..5f77e232566 100644 --- a/src/mongo/db/query/optimizer/node.h +++ b/src/mongo/db/query/optimizer/node.h @@ -722,7 +722,7 @@ private: MAKE_PRINTABLE_ENUM(GroupNodeType, GROUPNODETYPE_OPNAMES); MAKE_PRINTABLE_ENUM_STRING_ARRAY(GroupNodeTypeEnum, GroupNodeType, GROUPNODETYPE_OPNAMES); -#undef PATHSYNTAX_OPNAMES +#undef GROUPNODETYPE_OPNAMES /** * Group-by node. @@ -859,6 +859,105 @@ private: ProjectionNameVector _projections; }; +#define SPOOL_PRODUCER_TYPE_OPNAMES(F) \ + F(Eager) \ + F(Lazy) + +MAKE_PRINTABLE_ENUM(SpoolProducerType, SPOOL_PRODUCER_TYPE_OPNAMES); +MAKE_PRINTABLE_ENUM_STRING_ARRAY(SpoolProducerTypeEnum, + SpoolProducerType, + SPOOL_PRODUCER_TYPE_OPNAMES); +#undef SPOOL_PRODUCER_TYPE_OPNAMES + +/** + * Spool producer node. + * + * This is a physical node. It buffers the values coming from its child in a shared buffer indexed + * by the "spoolId" field. This buffer in turn is accessed via a corresponding SpoolConsumer node. + * It can be used to implement recursive plans. + * + * We have two different modes of operation: + * 1. Eager: on startup it will read and store the entire input from its child into the buffer + * identified by the "spoolId" parameter. Then when asked for more data, it will return data from + * the buffer. + * 2. Lazy: by contrast to "eager", it will request each value from its child incrementally + * and store it into the shared buffer, and immediately propagate it to the parent. + */ +class SpoolProducerNode final : public ABTOpFixedArity<4>, public ExclusivelyPhysicalNode { + using Base = ABTOpFixedArity<4>; + +public: + SpoolProducerNode(SpoolProducerType type, + int64_t spoolId, + ProjectionNameVector projections, + ABT filter, + ABT child); + + bool operator==(const SpoolProducerNode& other) const; + + const ExpressionBinder& binder() const { + const ABT& result = get<2>(); + tassert(6624126, "Invalid binder type", result.is<ExpressionBinder>()); + return *result.cast<ExpressionBinder>(); + } + + SpoolProducerType getType() const; + int64_t getSpoolId() const; + + const ABT& getFilter() const; + + const ABT& getChild() const; + ABT& getChild(); + +private: + const SpoolProducerType _type; + const int64_t _spoolId; +}; + +#define SPOOL_CONSUMER_TYPE_OPNAMES(F) \ + F(Stack) \ + F(Regular) + +MAKE_PRINTABLE_ENUM(SpoolConsumerType, SPOOL_CONSUMER_TYPE_OPNAMES); +MAKE_PRINTABLE_ENUM_STRING_ARRAY(SpoolConsumerTypeEnum, + SpoolConsumerType, + SPOOL_CONSUMER_TYPE_OPNAMES); +#undef SPOOL_CONSUMER_TYPE_OPNAMES + +/** + * Spool consumer node. + * + * This is a physical node. It delivers incoming values from a shared buffer (indexed by "spoolId"). + * This shared buffer is populated by a corresponding SpoolProducer node. + * + * It has two modes of operation: + * 1. Stack: the consumer removes each value from the buffer as it is returned. The values are + * returned in reverse order (hence "stack") of insertion in the shared buffer. + * 2. Regular: the node will return the values in the same order in which they were inserted. The + * values are not removed from the buffer. + */ +class SpoolConsumerNode final : public ABTOpFixedArity<1>, public ExclusivelyPhysicalNode { + using Base = ABTOpFixedArity<1>; + +public: + SpoolConsumerNode(SpoolConsumerType type, int64_t spoolId, ProjectionNameVector projections); + + bool operator==(const SpoolConsumerNode& other) const; + + const ExpressionBinder& binder() const { + const ABT& result = get<0>(); + tassert(6624135, "Invalid binder type", result.is<ExpressionBinder>()); + return *result.cast<ExpressionBinder>(); + } + + SpoolConsumerType getType() const; + int64_t getSpoolId() const; + +private: + const SpoolConsumerType _type; + const int64_t _spoolId; +}; + /** * Collation node. * This node is both logical and physical. diff --git a/src/mongo/db/query/optimizer/reference_tracker.cpp b/src/mongo/db/query/optimizer/reference_tracker.cpp index b56bf7f28cf..c44cbff9191 100644 --- a/src/mongo/db/query/optimizer/reference_tracker.cpp +++ b/src/mongo/db/query/optimizer/reference_tracker.cpp @@ -656,7 +656,9 @@ struct Collector { // Manually copy and resolve references of specific child. We do this manually because // each Variable must be resolved by the appropriate child's definition. for (const auto& name : names) { - tassert(6624031, "Union projection does not exist", u.defs.count(name) != 0); + tassert(6624031, + str::stream() << "Union projection does not exist: " << name, + u.defs.count(name) != 0); u.useMap.emplace(&refsResult.freeVars[name][counter].get(), u.defs[name]); } u.defs.clear(); @@ -769,6 +771,40 @@ struct Collector { return result; } + CollectedInfo transport(const ABT& n, + const SpoolProducerNode& node, + CollectedInfo childResult, + CollectedInfo filterResult, + CollectedInfo bindResult, + CollectedInfo refsResult) { + CollectedInfo result{}; + + result.merge(std::move(refsResult)); + result.merge(std::move(childResult)); + + const auto& binder = node.binder(); + for (size_t i = 0; i < binder.names().size(); i++) { + const auto& name = binder.names().at(i); + tassert(6624138, + str::stream() << "Spool projection does not exist: " << name, + result.defs.count(name) != 0); + + // Redefine projection. + result.defs[name] = Definition{n.ref(), binder.exprs()[i].ref()}; + } + + result.mergeNoDefs(std::move(bindResult)); + result.mergeNoDefs(std::move(filterResult)); + + result.nodeDefs[&node] = result.defs; + + return result; + } + + CollectedInfo transport(const ABT& n, const SpoolConsumerNode& node, CollectedInfo bindResult) { + return collectForScan(n, node, node.binder(), {}); + } + CollectedInfo collect(const ABT& n) { return algebra::transport<true>(n, *this); } diff --git a/src/mongo/db/query/optimizer/syntax/expr.cpp b/src/mongo/db/query/optimizer/syntax/expr.cpp index 0fed04069fa..bc50cb4a4df 100644 --- a/src/mongo/db/query/optimizer/syntax/expr.cpp +++ b/src/mongo/db/query/optimizer/syntax/expr.cpp @@ -167,5 +167,13 @@ Decimal128 Constant::getValueDecimal() const { return bitcastTo<Decimal128>(_val); } +bool Constant::isValueBool() const { + return _tag == TypeTags::Boolean; +} + +bool Constant::getValueBool() const { + uassert(6624356, "Constant value type is not bool", isValueBool()); + return bitcastTo<bool>(_val); +} } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/syntax/expr.h b/src/mongo/db/query/optimizer/syntax/expr.h index ba89404dbac..7fed5b15734 100644 --- a/src/mongo/db/query/optimizer/syntax/expr.h +++ b/src/mongo/db/query/optimizer/syntax/expr.h @@ -98,6 +98,9 @@ public: bool isValueDecimal() const; Decimal128 getValueDecimal() const; + bool isValueBool() const; + bool getValueBool() const; + bool isNumber() const { return sbe::value::isNumber(_tag); } diff --git a/src/mongo/db/query/optimizer/syntax/syntax.h b/src/mongo/db/query/optimizer/syntax/syntax.h index e3a649c7106..9cafb04d549 100644 --- a/src/mongo/db/query/optimizer/syntax/syntax.h +++ b/src/mongo/db/query/optimizer/syntax/syntax.h @@ -97,6 +97,8 @@ using ABT = algebra::PolyValue<Blackhole, GroupByNode, UnwindNode, UniqueNode, + SpoolProducerNode, + SpoolConsumerNode, CollationNode, LimitSkipNode, ExchangeNode, diff --git a/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h b/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h index b30b06bdfdf..80f97788698 100644 --- a/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h +++ b/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h @@ -91,6 +91,8 @@ class UnionNode; class GroupByNode; class UnwindNode; class UniqueNode; +class SpoolProducerNode; +class SpoolConsumerNode; class CollationNode; class LimitSkipNode; class ExchangeNode; diff --git a/src/mongo/db/query/optimizer/unit_test_infra_test.cpp b/src/mongo/db/query/optimizer/unit_test_infra_test.cpp index 6428096c0f9..d1c2e95eb85 100644 --- a/src/mongo/db/query/optimizer/unit_test_infra_test.cpp +++ b/src/mongo/db/query/optimizer/unit_test_infra_test.cpp @@ -263,6 +263,8 @@ TEST(TestInfra, GenerateABTLiterals) { NodeBuilder{} \ .root("pc") \ .collation({"pa:1", "pc:-1"}) \ + .ls(1, 0) \ + .spoolp("Lazy", 1, _varnames("pa"), _cbool(true)) \ .gb(_varnames("pa"), _varnames("pc"), {"pb"_var}) \ .filter(_evalf(_cmp("Gt", "1"_cint64), "pb"_var)) \ .eval("pb", _evalp(_get("b", _id()), "root"_var)) \ diff --git a/src/mongo/db/query/optimizer/utils/reftracker_utils.cpp b/src/mongo/db/query/optimizer/utils/reftracker_utils.cpp index cec8c81412f..d148306b818 100644 --- a/src/mongo/db/query/optimizer/utils/reftracker_utils.cpp +++ b/src/mongo/db/query/optimizer/utils/reftracker_utils.cpp @@ -172,6 +172,18 @@ public: return extractFromABT(refs); } + ProjectionNameSet walk(const SpoolProducerNode& /*node*/, + const ABT& /*child*/, + const ABT& /*filter*/, + const ABT& /*binds*/, + const ABT& refs) { + return extractFromABT(refs); + } + + ProjectionNameSet walk(const SpoolConsumerNode& /*node*/, const ABT& /*binds*/) { + return {}; + } + ProjectionNameSet walk(const CollationNode& /*node*/, const ABT& /*child*/, const ABT& refs) { return extractFromABT(refs); } diff --git a/src/mongo/db/query/optimizer/utils/unit_test_abt_literals.h b/src/mongo/db/query/optimizer/utils/unit_test_abt_literals.h index f435303ec42..607765d46ce 100644 --- a/src/mongo/db/query/optimizer/utils/unit_test_abt_literals.h +++ b/src/mongo/db/query/optimizer/utils/unit_test_abt_literals.h @@ -33,6 +33,7 @@ #include <typeinfo> #include "mongo/db/query/optimizer/node.h" +#include "mongo/db/query/optimizer/utils/utils.h" namespace mongo::optimizer::unit_test_abt_literals { @@ -66,16 +67,20 @@ using NodeHolder = ABTHolder<NodeTag>; /** * ABT Expressions */ -inline Operations getOpByName(StringData str) { - for (size_t i = 0; i < sizeof(OperationsEnum::toString) / sizeof(OperationsEnum::toString[0]); - i++) { - if (str == OperationsEnum::toString[i]) { - return static_cast<Operations>(i); +template <class T, class T1> +inline T getEnumByName(StringData str, const T1& toStr) { + for (size_t i = 0; i < sizeof(toStr) / sizeof(toStr[0]); i++) { + if (str == toStr[i]) { + return static_cast<T>(i); } } MONGO_UNREACHABLE; } +inline Operations getOpByName(StringData str) { + return getEnumByName<Operations>(str, OperationsEnum::toString); +} + template <class T> inline ABTVector holdersToABTs(T holders) { ABTVector v; @@ -105,6 +110,11 @@ inline auto operator"" _cdouble(const char* c, size_t len) { return ExprHolder{Constant::fromDouble(std::stod({c, len}))}; } +// Boolean constant. +inline auto _cbool(const bool val) { + return ExprHolder{Constant::boolean(val)}; +} + // Variable. inline auto operator"" _var(const char* c, size_t len) { return ExprHolder{make<Variable>(ProjectionName{{c, len}})}; @@ -236,6 +246,10 @@ inline auto _scan(ProjectionName pn, std::string scanDefName) { return NodeHolder{make<ScanNode>(std::move(pn), std::move(scanDefName))}; } +inline auto _coscan() { + return NodeHolder{make<CoScanNode>()}; +} + inline auto _filter(ExprHolder expr, NodeHolder input) { return NodeHolder{make<FilterNode>(std::move(expr._n), std::move(input._n))}; } @@ -283,6 +297,31 @@ inline auto _union(ProjectionNameVector pns, std::vector<NodeHolder> inputs) { return NodeHolder{make<UnionNode>(std::move(pns), holdersToABTs(std::move(inputs)))}; } +inline auto _ls(const int64_t limit, const int64_t skip, NodeHolder input) { + return NodeHolder{ + make<LimitSkipNode>(properties::LimitSkipRequirement{limit, skip}, std::move(input._n))}; +} + +inline auto _spoolp(StringData type, + int64_t spoolId, + ProjectionNameVector pns, + ExprHolder filter, + NodeHolder child) { + return NodeHolder{make<SpoolProducerNode>( + getEnumByName<SpoolProducerType>(type, SpoolProducerTypeEnum::toString), + spoolId, + std::move(pns), + std::move(filter._n), + std::move(child._n))}; +} + +inline auto _spoolc(StringData type, int64_t spoolId, ProjectionNameVector pns) { + return NodeHolder{make<SpoolConsumerNode>( + getEnumByName<SpoolConsumerType>(type, SpoolConsumerTypeEnum::toString), + spoolId, + std::move(pns))}; +} + /** * Note the root returns an ABT instead of a holder. */ @@ -333,6 +372,25 @@ public: return advanceChildPtr<CollationNode>(_collation(std::move(spec), makeStub())); } + // This first input is stubbed. + NodeBuilder& un(ProjectionNameVector pns, std::vector<NodeHolder> additionalInputs) { + additionalInputs.insert(additionalInputs.begin(), makeStub()); + return advanceChildPtr<UnionNode, FirstChildAccessor<UnionNode>>( + _union(std::move(pns), std::move(additionalInputs))); + } + + NodeBuilder& ls(const int64_t limit, const int64_t skip) { + return advanceChildPtr<LimitSkipNode>(_ls(limit, skip, makeStub())); + } + + NodeBuilder& spoolp(StringData type, + int64_t spoolId, + ProjectionNameVector pns, + ExprHolder filter) { + return advanceChildPtr<SpoolProducerNode>( + _spoolp(type, spoolId, std::move(pns), std::move(filter), makeStub())); + } + template <typename... Ts> NodeBuilder& root(Ts&&... pack) { return advanceChildPtr<RootNode>({_root(std::forward<Ts>(pack)...)(makeStub())}); @@ -344,11 +402,11 @@ private: return {make<ValueScanNode>(ProjectionNameVector{}, boost::none)}; } - template <class T> + template <class T, class Accessor = DefaultChildAccessor<T>> NodeBuilder& advanceChildPtr(NodeHolder holder) { invariant(_prevChildPtr); *_prevChildPtr = std::move(holder._n); - _prevChildPtr = &_prevChildPtr->cast<T>()->getChild(); + _prevChildPtr = &Accessor()(*_prevChildPtr); return *this; } @@ -412,6 +470,10 @@ public: * ABT Expressions. */ std::string transport(const Constant& expr) { + if (expr.isValueBool()) { + return str::stream() << "_cbool(" << (expr.getValueBool() ? "true" : "false") << ")"; + } + str::stream out; out << "\"" << expr.get() << "\""; @@ -512,20 +574,34 @@ public: * ABT Nodes. */ std::string transport(const ScanNode& node, std::string /*bindResult*/) { - return str::stream() << ".finish(_scan(\"" << node.getProjectionName() << "\", \"" - << node.getScanDefName() << "\"))"; + return finish(str::stream() << "_scan(\"" << node.getProjectionName() << "\", \"" + << node.getScanDefName() << "\")"); + } + + std::string transport(const CoScanNode& node) { + return finish("_coscan()"); + } + + std::string transport(const SpoolConsumerNode& node, std::string /*bindResult*/) { + str::stream os; + os << "_spoolc(\"" << SpoolConsumerTypeEnum::toString[static_cast<int>(node.getType())] + << "\"" + << ", " << node.getSpoolId() << ", _varnames("; + printProjNames(os, node.binder().names()); + os << "))"; + return finish(os); } std::string transport(const FilterNode& node, std::string childResult, std::string filterResult) { - return str::stream() << ".filter(" << explain(node.getFilter()) << ")" << _nodeSeparator - << childResult; + return str::stream() << ".filter(" << filterResult << ")" << _nodeSeparator << childResult; } std::string transport(const EvaluationNode& node, std::string childResult, std::string projResult) { + // We explain the projection directly to avoid explaining the binder. return str::stream() << ".eval(\"" << node.getProjectionName() << "\", " << explain(node.getProjection()) << ")" << _nodeSeparator << childResult; @@ -589,6 +665,25 @@ public: return os << "})" << _nodeSeparator << childResult; } + std::string transport(const LimitSkipNode& node, std::string childResult) { + return str::stream() << ".ls(" << node.getProperty().getLimit() << ", " + << node.getProperty().getSkip() << ")" << _nodeSeparator + << childResult; + } + + std::string transport(const SpoolProducerNode& node, + std::string childResult, + std::string filterResult, + std::string /*bindResult*/, + std::string /*refsResult*/) { + str::stream os; + os << ".spoolp(\"" << SpoolProducerTypeEnum::toString[static_cast<int>(node.getType())] + << "\"" + << ", " << node.getSpoolId() << ", _varnames("; + printProjNames(os, node.binder().names()); + return os << "), " << filterResult << ")" << _nodeSeparator << childResult; + } + std::string transport(const RootNode& node, std::string childResult, std::string refsResult) { str::stream os; os << ".root("; @@ -619,6 +714,10 @@ private: } } + std::string finish(std::string nullaryNode) { + return str::stream() << ".finish(" << nullaryNode << ")"; + } + const std::string _nodeSeparator; }; diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index f395507757d..6eefb07cc24 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -113,6 +113,62 @@ inline void maybeComposePaths(ABTVector& paths) { } /** + * Used to access and manipulate the child of a unary node. + */ +template <class NodeType> +struct DefaultChildAccessor { + const ABT& operator()(const ABT& node) const { + return node.cast<NodeType>()->getChild(); + } + + ABT& operator()(ABT& node) const { + return node.cast<NodeType>()->getChild(); + } +}; + +/** + * Used to access and manipulate the left child of a binary node. + */ +template <class NodeType> +struct LeftChildAccessor { + const ABT& operator()(const ABT& node) const { + return node.cast<NodeType>()->getLeftChild(); + } + + ABT& operator()(ABT& node) const { + return node.cast<NodeType>()->getLeftChild(); + } +}; + +/** + * Used to access and manipulate the right child of a binary node. + */ +template <class NodeType> +struct RightChildAccessor { + const ABT& operator()(const ABT& node) const { + return node.cast<NodeType>()->getRightChild(); + } + + ABT& operator()(ABT& node) const { + return node.cast<NodeType>()->getRightChild(); + } +}; + +/** + * Used to access and manipulate the first child of a n-ary node. + */ +template <class NodeType> +struct FirstChildAccessor { + const ABT& operator()(const ABT& node) const { + return node.cast<NodeType>()->nodes().front(); + } + + ABT& operator()(ABT& node) { + return node.cast<NodeType>()->nodes().front(); + } +}; + +/** * Used to vend out fresh projection names. */ class PrefixId { |