diff options
author | Hana Pearlman <hana.pearlman@mongodb.com> | 2022-08-29 16:26:59 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-29 17:37:11 +0000 |
commit | fdefb32e8d48c01c8d0c882938288963c2062008 (patch) | |
tree | 0a63dc4c08960c6eb7b3a7e458c0bc2b4b35cd6f /src/mongo/db/query/optimizer | |
parent | cf84bec54627ba1efb6aebd829f6b3d11aaf112e (diff) | |
download | mongo-fdefb32e8d48c01c8d0c882938288963c2062008.tar.gz |
SERVER-66845: Comment and polish ABT operator types
Diffstat (limited to 'src/mongo/db/query/optimizer')
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/memo.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/node.cpp | 66 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/node.h | 180 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/syntax/expr.h | 102 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/syntax/path.h | 132 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/syntax/syntax.h | 55 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h | 3 |
7 files changed, 330 insertions, 210 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp index a97fccb3088..c702175bd4e 100644 --- a/src/mongo/db/query/optimizer/cascades/memo.cpp +++ b/src/mongo/db/query/optimizer/cascades/memo.cpp @@ -627,7 +627,7 @@ GroupIdType Memo::addGroup(ProjectionNameSet projections) { std::pair<MemoLogicalNodeId, bool> Memo::addNode(GroupIdType groupId, ABT n, LogicalRewriteType rule) { - uassert(6624052, "Attempting to insert a physical node", !n.is<PhysicalNode>()); + uassert(6624052, "Attempting to insert a physical node", !n.is<ExclusivelyPhysicalNode>()); uassert(6624053, "Attempting to insert a logical delegator node", !n.is<MemoLogicalDelegatorNode>()); diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp index 3f0cc0d5203..859e45ff6f9 100644 --- a/src/mongo/db/query/optimizer/node.cpp +++ b/src/mongo/db/query/optimizer/node.cpp @@ -47,6 +47,10 @@ static ABT buildSimpleBinder(const ProjectionNameVector& names) { return make<ExpressionBinder>(names, std::move(sources)); } +/** + * Builds References from the provided projection names. Equality of References is sensitive + * to order, so the projections are sorted first. + */ static ABT buildReferences(const ProjectionNameSet& projections) { ABTVector variables; ProjectionNameOrderedSet ordered = convertToOrderedSet(projections); @@ -123,19 +127,24 @@ ValueScanNode::ValueScanNode(ProjectionNameVector projections) ValueScanNode::ValueScanNode(ProjectionNameVector projections, ABT valueArray) : Base(buildSimpleBinder(std::move(projections))), _valueArray(std::move(valueArray)) { const auto constPtr = _valueArray.cast<Constant>(); - uassert(6624081, "Expected a constant", constPtr != nullptr); + tassert(6624081, "ValueScan must be intialized with a constant", constPtr != nullptr); const auto [tag, val] = constPtr->get(); - uassert(6624082, "Expected an array constant.", tag == sbe::value::TypeTags::Array); + tassert( + 6624082, "ValueScan must be intialized with an array", tag == sbe::value::TypeTags::Array); const auto arr = sbe::value::getArrayView(val); _arraySize = arr->size(); const size_t projectionCount = binder().names().size(); for (size_t i = 0; i < _arraySize; i++) { const auto [tag1, val1] = arr->getAt(i); - uassert(6624083, "Expected an array element.", tag1 == sbe::value::TypeTags::Array); + tassert(6624083, + "ValueScan must be intialized with an array", + tag1 == sbe::value::TypeTags::Array); const size_t innerSize = sbe::value::getArrayView(val1)->size(); - uassert(6624084, "Invalid array size.", innerSize == projectionCount); + tassert(6624084, + "Element of array must have one entry per projection", + innerSize == projectionCount); } } @@ -339,25 +348,30 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap, _scanParams(std::move(scanParams)), _target(target) { assertNodeSort(getChild()); - uassert(6624085, "Empty requirements map", !_reqMap.empty()); - // We currently use a 64-bit mask when splitting into left and right requirements. - uassert(6624086, "Requirements map too large", _reqMap.size() < 64); + tassert(6624085, "SargableNode requires at least one predicate", !_reqMap.empty()); + tassert(6624086, + str::stream() << "SargableNode has too many predicates: " << _reqMap.size(), + _reqMap.size() < kMaxPartialSchemaRequirements); // Assert merged map does not contain duplicate bound projections. ProjectionNameSet boundsProjectionNameSet; for (const auto& entry : _reqMap) { - if (entry.second.hasBoundProjectionName() && - !boundsProjectionNameSet.insert(entry.second.getBoundProjectionName()).second) { - uasserted(6624087, "Duplicate bound projection"); + if (entry.second.hasBoundProjectionName()) { + const bool inserted = + boundsProjectionNameSet.insert(entry.second.getBoundProjectionName()).second; + tassert(6624087, + str::stream() << "SargableNode has duplicate bound projection: " + << entry.second.getBoundProjectionName(), + inserted); } } // Assert there are no references to internally bound projections. for (const auto& entry : _reqMap) { - if (boundsProjectionNameSet.find(entry.first._projectionName) != - boundsProjectionNameSet.cend()) { - uasserted(6624088, "We are binding to an internal projection"); - } + tassert(6624088, + "SargableNode cannot reference an internally bound projection", + boundsProjectionNameSet.find(entry.first._projectionName) == + boundsProjectionNameSet.cend()); } } @@ -454,8 +468,9 @@ HashJoinNode::HashJoinNode(JoinType joinType, _joinType(joinType), _leftKeys(std::move(leftKeys)), _rightKeys(std::move(rightKeys)) { - uassert( - 6624089, "Invalid key sizes", !_leftKeys.empty() && _leftKeys.size() == _rightKeys.size()); + tassert(6624089, + "Mismatched number of left and right join keys", + !_leftKeys.empty() && _leftKeys.size() == _rightKeys.size()); assertNodeSort(getLeftChild()); assertNodeSort(getRightChild()); } @@ -505,9 +520,11 @@ MergeJoinNode::MergeJoinNode(ProjectionNameVector leftKeys, _collation(std::move(collation)), _leftKeys(std::move(leftKeys)), _rightKeys(std::move(rightKeys)) { - uassert( - 6624090, "Invalid key sizes", !_leftKeys.empty() && _leftKeys.size() == _rightKeys.size()); - uassert(6624091, "Invalid collation size", _collation.size() == _leftKeys.size()); + tassert(6624090, + "Mismatched number of left and right join keys", + !_leftKeys.empty() && _leftKeys.size() == _rightKeys.size()); + tassert( + 6624091, "Mismatched collation and join key size", _collation.size() == _leftKeys.size()); assertNodeSort(getLeftChild()); assertNodeSort(getRightChild()); } @@ -567,7 +584,8 @@ UnionNode::UnionNode(ProjectionNameVector unionProjectionNames, ABTVector childr : Base(std::move(children), buildSimpleBinder(unionProjectionNames), buildUnionReferences(unionProjectionNames, children.size())) { - uassert(6624007, "Empty union", !unionProjectionNames.empty()); + tassert( + 6624007, "UnionNode must have a non-empty projection list", !unionProjectionNames.empty()); for (auto& n : nodes()) { assertNodeSort(n); @@ -600,8 +618,8 @@ GroupByNode::GroupByNode(ProjectionNameVector groupByProjectionNames, make<References>(groupByProjectionNames)), _type(type) { assertNodeSort(getChild()); - uassert(6624300, - "Mismatched number of agg expressions and names", + tassert(6624300, + "Mismatched number of agg expressions and projection names", getAggregationExpressions().size() == getAggregationProjectionNames().size()); } @@ -660,7 +678,7 @@ UniqueNode::UniqueNode(ProjectionNameVector projections, ABT child) : Base(std::move(child), make<References>(ProjectionNameVector{projections})), _projections(std::move(projections)) { assertNodeSort(getChild()); - uassert(6624092, "Empty projections", !_projections.empty()); + tassert(6624092, "UniqueNode must have a non-empty projection list", !_projections.empty()); } bool UniqueNode::operator==(const UniqueNode& other) const { @@ -731,7 +749,7 @@ ExchangeNode::ExchangeNode(const properties::DistributionRequirement distributio : Base(std::move(child), buildReferences(distribution.getAffectedProjectionNames())), _distribution(std::move(distribution)) { assertNodeSort(getChild()); - uassert(6624008, + tassert(6624008, "Cannot exchange towards an unknown distribution", distribution.getDistributionAndProjections()._type != DistributionType::UnknownPartitioning); diff --git a/src/mongo/db/query/optimizer/node.h b/src/mongo/db/query/optimizer/node.h index ded879d932a..22b1e956c81 100644 --- a/src/mongo/db/query/optimizer/node.h +++ b/src/mongo/db/query/optimizer/node.h @@ -51,47 +51,45 @@ using ProjectionType = ABT; /** * Marker for node class (both logical and physical sub-classes). - * A node not marked with either LogicalNode or PhysicalNode is considered to be both a logical and - * a physical node (e.g. a filter node). It is invalid to mark a node with both tags in the same - * time. + * A node not marked with either ExclusivelyLogicalNode or ExclusivelyPhysicalNode is considered to + * be both a logical and a physical node (e.g. a filter node). It is invalid to mark a node with + * both tags at the same time. */ class Node {}; /** * Marker for exclusively logical nodes. */ -class LogicalNode {}; +class ExclusivelyLogicalNode : public Node {}; /** * Marker for exclusively physical nodes. */ -class PhysicalNode {}; +class ExclusivelyPhysicalNode : public Node {}; inline void assertNodeSort(const ABT& e) { - if (!e.is<Node>()) { - uasserted(6624009, "Node syntax sort expected"); - } + tassert(6624009, "Node syntax sort expected", e.is<Node>()); } template <class T> inline constexpr bool canBeLogicalNode() { // Node which is not exclusively physical. - return std::is_base_of_v<Node, T> && !std::is_base_of_v<PhysicalNode, T>; + return std::is_base_of_v<Node, T> && !std::is_base_of_v<ExclusivelyPhysicalNode, T>; } template <class T> inline constexpr bool canBePhysicalNode() { // Node which is not exclusively logical. - return std::is_base_of_v<Node, T> && !std::is_base_of_v<LogicalNode, T>; + return std::is_base_of_v<Node, T> && !std::is_base_of_v<ExclusivelyLogicalNode, T>; } /** * Logical Scan node. - * It defines scanning a collection with an optional projection name that contains the documents. - * The collection is specified via the scanDefName entry in the metadata. + * Represents scanning from an underlying collection and producing a single projection conceptually + * containing the stream of BSON objects read from the collection. */ -class ScanNode final : public Operator<ScanNode, 1>, public Node, public LogicalNode { - using Base = Operator<ScanNode, 1>; +class ScanNode final : public Operator<1>, public ExclusivelyLogicalNode { + using Base = Operator<1>; public: static constexpr const char* kDefaultCollectionNameSpec = "collectionName"; @@ -102,7 +100,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<0>(); - uassert(6624010, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624010, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -118,15 +116,12 @@ private: /** * Physical Scan node. * It defines scanning a collection with an optional projection name that contains the documents. - * The collection is specified via the scanDefName entry in the metadata. * * Optionally set of fields is specified to retrieve from the underlying collection, and expose as * projections. */ -class PhysicalScanNode final : public Operator<PhysicalScanNode, 1>, - public Node, - public PhysicalNode { - using Base = Operator<PhysicalScanNode, 1>; +class PhysicalScanNode final : public Operator<1>, public ExclusivelyPhysicalNode { + using Base = Operator<1>; public: PhysicalScanNode(FieldProjectionMap fieldProjectionMap, @@ -137,7 +132,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<0>(); - uassert(6624011, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624011, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -156,21 +151,26 @@ private: /** * Logical ValueScanNode. * - * It originates a set of projections each with a fixed - * sequence of values, which is encoded as an array. + * It originates a set of projections each with a fixed sequence of values, which is encoded as an + * array. */ -class ValueScanNode final : public Operator<ValueScanNode, 1>, public Node, public LogicalNode { - using Base = Operator<ValueScanNode, 1>; +class ValueScanNode final : public Operator<1>, public ExclusivelyLogicalNode { + using Base = Operator<1>; public: ValueScanNode(ProjectionNameVector projections); + + /** + * Each element of 'valueArray' is an array itself and must have one entry corresponding to + * each of 'projections'. + */ ValueScanNode(ProjectionNameVector projections, ABT valueArray); bool operator==(const ValueScanNode& other) const; const ExpressionBinder& binder() const { const ABT& result = get<0>(); - uassert(6624012, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624012, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -185,12 +185,12 @@ private: /** * Physical CoScanNode. * - * Conceptually it originates an infinite stream of Nothing. - * A typical use case is to limit it to one document, and attach projections with a following - * EvaluationNode(s). + * The "Co" in CoScan indicates that it is constant; conceptually it originates an infinite stream + * of Nothing. A typical use case is to limit it to one document, and attach projections with a + * following EvaluationNode(s). */ -class CoScanNode final : public Operator<CoScanNode, 0>, public Node, public PhysicalNode { - using Base = Operator<CoScanNode, 0>; +class CoScanNode final : public Operator<0>, public ExclusivelyPhysicalNode { + using Base = Operator<0>; public: CoScanNode(); @@ -202,11 +202,9 @@ public: * Index scan node. * Retrieve data using an index. Return recordIds or values (if the index is covering). * This is a physical node. - * - * The collection is specified by scanDef, and the index by the indexDef. */ -class IndexScanNode final : public Operator<IndexScanNode, 1>, public Node, public PhysicalNode { - using Base = Operator<IndexScanNode, 1>; +class IndexScanNode final : public Operator<1>, public ExclusivelyPhysicalNode { + using Base = Operator<1>; public: IndexScanNode(FieldProjectionMap fieldProjectionMap, IndexSpecification indexSpec); @@ -215,7 +213,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<0>(); - uassert(6624013, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624013, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -236,10 +234,11 @@ private: * seek. 'fieldProjectionMap' may choose to include an outgoing rid which will contain the * successive (if we do not have a following limit) document ids. * - * TODO: Can we let it advance with a limit based on upper rid limit in case of primary index? + * TODO SERVER-68936: Can we let it advance with a limit based on upper rid limit in case of primary + * index? */ -class SeekNode final : public Operator<SeekNode, 2>, public Node, public PhysicalNode { - using Base = Operator<SeekNode, 2>; +class SeekNode final : public Operator<2>, public ExclusivelyPhysicalNode { + using Base = Operator<2>; public: SeekNode(ProjectionName ridProjectionName, @@ -250,7 +249,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<0>(); - uassert(6624014, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624014, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -271,10 +270,8 @@ private: * Logical group delegator node: scan from a given group. * Used in conjunction with memo. */ -class MemoLogicalDelegatorNode final : public Operator<MemoLogicalDelegatorNode, 0>, - public Node, - public LogicalNode { - using Base = Operator<MemoLogicalDelegatorNode, 0>; +class MemoLogicalDelegatorNode final : public Operator<0>, public ExclusivelyLogicalNode { + using Base = Operator<0>; public: MemoLogicalDelegatorNode(GroupIdType groupId); @@ -291,10 +288,8 @@ private: * Physical group delegator node: refer to a physical node in a memo group. * Used in conjunction with memo. */ -class MemoPhysicalDelegatorNode final : public Operator<MemoPhysicalDelegatorNode, 0>, - public Node, - public PhysicalNode { - using Base = Operator<MemoPhysicalDelegatorNode, 0>; +class MemoPhysicalDelegatorNode final : public Operator<0>, public ExclusivelyPhysicalNode { + using Base = Operator<0>; public: MemoPhysicalDelegatorNode(MemoPhysicalNodeId nodeId); @@ -313,8 +308,8 @@ private: * * This node is both logical and physical. */ -class FilterNode final : public Operator<FilterNode, 2>, public Node { - using Base = Operator<FilterNode, 2>; +class FilterNode final : public Operator<2>, public Node { + using Base = Operator<2>; public: FilterNode(FilterType filter, ABT child); @@ -334,8 +329,8 @@ public: * * This node is both logical and physical. */ -class EvaluationNode final : public Operator<EvaluationNode, 2>, public Node { - using Base = Operator<EvaluationNode, 2>; +class EvaluationNode final : public Operator<2>, public Node { + using Base = Operator<2>; public: EvaluationNode(ProjectionName projectionName, ProjectionType projection, ABT child); @@ -344,7 +339,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<1>(); - uassert(6624015, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624015, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -375,10 +370,8 @@ public: * restrict the type of operations on RIDs (in this case only set intersection) as opposed to say * filter on rid = 5. */ -class RIDIntersectNode final : public Operator<RIDIntersectNode, 2>, - public Node, - public LogicalNode { - using Base = Operator<RIDIntersectNode, 2>; +class RIDIntersectNode final : public Operator<2>, public ExclusivelyLogicalNode { + using Base = Operator<2>; public: RIDIntersectNode(ProjectionName scanProjectionName, @@ -422,10 +415,16 @@ private: * PathGet "a" Traverse Id | scan_0 -> [1, +inf], <none> * PathGet "b" Id | scan_0 -> (-inf, +inf), "pb" */ -class SargableNode final : public Operator<SargableNode, 3>, public Node, public LogicalNode { - using Base = Operator<SargableNode, 3>; +class SargableNode final : public Operator<3>, public ExclusivelyLogicalNode { + using Base = Operator<3>; public: + /** + * Maximum size of the PartialSchemaRequirements that can be used to create a SargableNode. We + * use a 64-bit mask when splitting into left and right requirements. + */ + static constexpr size_t kMaxPartialSchemaRequirements = 64; + SargableNode(PartialSchemaRequirements reqMap, CandidateIndexes candidateIndexes, boost::optional<ScanParams> scanParams, @@ -436,7 +435,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<1>(); - uassert(6624016, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624016, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -483,8 +482,8 @@ MAKE_PRINTABLE_ENUM_STRING_ARRAY(JoinTypeEnum, JoinType, JOIN_TYPE); * Variables used in the inner (right) side are automatically bound with variables from the left * (outer) side. */ -class BinaryJoinNode final : public Operator<BinaryJoinNode, 3>, public Node { - using Base = Operator<BinaryJoinNode, 3>; +class BinaryJoinNode final : public Operator<3>, public Node { + using Base = Operator<3>; public: BinaryJoinNode(JoinType joinType, @@ -518,12 +517,11 @@ private: /** * Physical hash join node. * Join condition is a conjunction of pairwise equalities between corresponding left and right keys. - * It assumes the outer side is probe side and inner side is "build" side. - * - * TODO: support all join types (not just Inner). + * It assumes the outer side is probe side and inner side is "build" side. Currently supports only + * inner joins. */ -class HashJoinNode final : public Operator<HashJoinNode, 3>, public Node, public PhysicalNode { - using Base = Operator<HashJoinNode, 3>; +class HashJoinNode final : public Operator<3>, public ExclusivelyPhysicalNode { + using Base = Operator<3>; public: HashJoinNode(JoinType joinType, @@ -556,8 +554,8 @@ private: * Merge Join node. * This is a physical node representing joining of two sorted inputs. */ -class MergeJoinNode final : public Operator<MergeJoinNode, 3>, public Node, public PhysicalNode { - using Base = Operator<MergeJoinNode, 3>; +class MergeJoinNode final : public Operator<3>, public ExclusivelyPhysicalNode { + using Base = Operator<3>; public: MergeJoinNode(ProjectionNameVector leftKeys, @@ -594,8 +592,8 @@ private: * * This node is both logical and physical. */ -class UnionNode final : public OperatorDynamic<UnionNode, 2>, public Node { - using Base = OperatorDynamic<UnionNode, 2>; +class UnionNode final : public OperatorDynamic<2>, public Node { + using Base = OperatorDynamic<2>; public: UnionNode(ProjectionNameVector unionProjectionNames, ABTVector children); @@ -604,7 +602,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<0>(); - uassert(6624017, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624017, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } }; @@ -622,11 +620,9 @@ MAKE_PRINTABLE_ENUM_STRING_ARRAY(GroupNodeTypeEnum, GroupNodeType, GROUPNODETYPE * Group-by node. * This node is logical with a default physical implementation corresponding to a hash group-by. * Projects the group-by column from its child, and adds aggregation expressions. - * - * TODO: other physical implementations: stream group-by. */ -class GroupByNode : public Operator<GroupByNode, 5>, public Node { - using Base = Operator<GroupByNode, 5>; +class GroupByNode : public Operator<5>, public Node { + using Base = Operator<5>; public: GroupByNode(ProjectionNameVector groupByProjectionNames, @@ -644,13 +640,13 @@ public: const ExpressionBinder& binderAgg() const { const ABT& result = get<1>(); - uassert(6624018, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624018, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } const ExpressionBinder& binderGb() const { const ABT& result = get<3>(); - uassert(6624019, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624019, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -689,8 +685,8 @@ private: * * This node is both logical and physical. */ -class UnwindNode final : public Operator<UnwindNode, 3>, public Node { - using Base = Operator<UnwindNode, 3>; +class UnwindNode final : public Operator<3>, public Node { + using Base = Operator<3>; public: UnwindNode(ProjectionName projectionName, @@ -702,7 +698,7 @@ public: const ExpressionBinder& binder() const { const ABT& result = get<1>(); - uassert(6624020, "Invalid binder type", result.is<ExpressionBinder>()); + tassert(6624020, "Invalid binder type", result.is<ExpressionBinder>()); return *result.cast<ExpressionBinder>(); } @@ -739,8 +735,8 @@ private: * sequence of given projection names. It is similar to GroupBy using the given projections as a * compound grouping key. */ -class UniqueNode final : public Operator<UniqueNode, 2>, public Node, public PhysicalNode { - using Base = Operator<UniqueNode, 2>; +class UniqueNode final : public Operator<2>, public ExclusivelyPhysicalNode { + using Base = Operator<2>; public: UniqueNode(ProjectionNameVector projections, ABT child); @@ -761,8 +757,8 @@ private: * * It represents an operator to collate (sort, or cluster) the input. */ -class CollationNode final : public Operator<CollationNode, 2>, public Node { - using Base = Operator<CollationNode, 2>; +class CollationNode final : public Operator<2>, public Node { + using Base = Operator<2>; public: CollationNode(properties::CollationRequirement property, ABT child); @@ -786,8 +782,8 @@ private: * * It limits the size of the input by a fixed amount. */ -class LimitSkipNode final : public Operator<LimitSkipNode, 1>, public Node { - using Base = Operator<LimitSkipNode, 1>; +class LimitSkipNode final : public Operator<1>, public Node { + using Base = Operator<1>; public: LimitSkipNode(properties::LimitSkipRequirement property, ABT child); @@ -809,12 +805,11 @@ private: * Exchange node. * It specifies how the relation is spread across machines in the execution environment. * Currently only single-node, and hash-based partitioning are supported. - * TODO: range-based partitioning, replication, and round-robin. * * This node is both logical and physical. */ -class ExchangeNode final : public Operator<ExchangeNode, 2>, public Node { - using Base = Operator<ExchangeNode, 2>; +class ExchangeNode final : public Operator<2>, public Node { + using Base = Operator<2>; public: ExchangeNode(properties::DistributionRequirement distribution, ABT child); @@ -833,7 +828,6 @@ private: /** * Defined for hash and range-based partitioning. - * TODO: other exchange-specific params (e.g. chunk boundaries?) */ const ProjectionName _projectionName; }; @@ -846,8 +840,8 @@ private: * * This node is only logical. */ -class RootNode final : public Operator<RootNode, 2>, public Node { - using Base = Operator<RootNode, 2>; +class RootNode final : public Operator<2>, public Node { + using Base = Operator<2>; public: RootNode(properties::ProjectionRequirement property, ABT child); diff --git a/src/mongo/db/query/optimizer/syntax/expr.h b/src/mongo/db/query/optimizer/syntax/expr.h index 5fb70be4445..cf08cffc004 100644 --- a/src/mongo/db/query/optimizer/syntax/expr.h +++ b/src/mongo/db/query/optimizer/syntax/expr.h @@ -37,7 +37,15 @@ namespace mongo::optimizer { -class Constant final : public Operator<Constant, 0>, public ExpressionSyntaxSort { +/** + * Marker class for expressions. Mutually exclusive with paths and nodes. + */ +class ExpressionSyntaxSort {}; + +/** + * Holds a constant SBE value with corresponding type tag. + */ +class Constant final : public Operator<0>, public ExpressionSyntaxSort { public: Constant(sbe::value::TypeTags tag, sbe::value::Value val); @@ -113,7 +121,12 @@ private: sbe::value::Value _val; }; -class Variable final : public Operator<Variable, 0>, public ExpressionSyntaxSort { +/** + * Represents a reference to a binding. The binding is specified by identifier (string). The logic + * for checking that the reference is valid (e.g., that the referenced binding is in scope) happens + * elsewhere. + */ +class Variable final : public Operator<0>, public ExpressionSyntaxSort { std::string _name; public: @@ -128,12 +141,16 @@ public: } }; -class UnaryOp final : public Operator<UnaryOp, 1>, public ExpressionSyntaxSort { - using Base = Operator<UnaryOp, 1>; +/** + * Models arithmetic and other operations that accept a single argument, for instance negate. + */ +class UnaryOp final : public Operator<1>, public ExpressionSyntaxSort { + using Base = Operator<1>; Operations _op; public: UnaryOp(Operations inOp, ABT inExpr) : Base(std::move(inExpr)), _op(inOp) { + tassert(6684501, "Binary op expected", isUnaryOp(_op)); assertExprSort(getChild()); } @@ -150,13 +167,18 @@ public: } }; -class BinaryOp final : public Operator<BinaryOp, 2>, public ExpressionSyntaxSort { - using Base = Operator<BinaryOp, 2>; +/** + * Models arithmetic, comparison, or logical operations that take two arguments, for instance add or + * subtract. + */ +class BinaryOp final : public Operator<2>, public ExpressionSyntaxSort { + using Base = Operator<2>; Operations _op; public: BinaryOp(Operations inOp, ABT inLhs, ABT inRhs) : Base(std::move(inLhs), std::move(inRhs)), _op(inOp) { + tassert(6684502, "Binary op expected", isBinaryOp(_op)); assertExprSort(getLeftChild()); assertExprSort(getRightChild()); } @@ -179,8 +201,11 @@ public: } }; -class If final : public Operator<If, 3>, public ExpressionSyntaxSort { - using Base = Operator<If, 3>; +/** + * Branching operator with a condition expression, "then" expression, and an "else" expression. + */ +class If final : public Operator<3>, public ExpressionSyntaxSort { + using Base = Operator<3>; public: If(ABT inCond, ABT inThen, ABT inElse) @@ -208,8 +233,12 @@ public: } }; -class Let final : public Operator<Let, 2>, public ExpressionSyntaxSort { - using Base = Operator<Let, 2>; +/** + * Defines a variable from one expression and a specified name which is available to be referenced + * in a second expression. + */ +class Let final : public Operator<2>, public ExpressionSyntaxSort { + using Base = Operator<2>; std::string _varName; @@ -237,8 +266,13 @@ public: } }; -class LambdaAbstraction final : public Operator<LambdaAbstraction, 1>, public ExpressionSyntaxSort { - using Base = Operator<LambdaAbstraction, 1>; +/** + * Represents a single argument lambda. Defines a local variable (representing the argument) which + * can be referenced within the lambda. The variable takes on the value to which LambdaAbstraction + * is applied by its parent. + */ +class LambdaAbstraction final : public Operator<1>, public ExpressionSyntaxSort { + using Base = Operator<1>; std::string _varName; @@ -265,8 +299,12 @@ public: } }; -class LambdaApplication final : public Operator<LambdaApplication, 2>, public ExpressionSyntaxSort { - using Base = Operator<LambdaApplication, 2>; +/** + * Evaluates an expression representing a function over an expression representing the argument to + * the function. + */ +class LambdaApplication final : public Operator<2>, public ExpressionSyntaxSort { + using Base = Operator<2>; public: LambdaApplication(ABT inLambda, ABT inArgument) @@ -288,9 +326,12 @@ public: } }; -class FunctionCall final : public OperatorDynamicHomogenous<FunctionCall>, - public ExpressionSyntaxSort { - using Base = OperatorDynamicHomogenous<FunctionCall>; +/** + * Dynamic arity operator which passes its children as arguments to a function specified by SBE + * function expression name. + */ +class FunctionCall final : public OperatorDynamicHomogenous, public ExpressionSyntaxSort { + using Base = OperatorDynamicHomogenous; std::string _name; public: @@ -310,8 +351,16 @@ public: } }; -class EvalPath final : public Operator<EvalPath, 2>, public ExpressionSyntaxSort { - using Base = Operator<EvalPath, 2>; +/** + * EvalPath defines one context for path behavior used for manipulation and replacement. Some path + * elements have special behavior under this conext. + * + * EvalPath evaluates its path child according to its context over the result of its expression + * child. That is, the expression is evaluated first, and it is used as an input value to the root + * path element, which can then continue the computation recursively. + */ +class EvalPath final : public Operator<2>, public ExpressionSyntaxSort { + using Base = Operator<2>; public: EvalPath(ABT inPath, ABT inInput) : Base(std::move(inPath), std::move(inInput)) { @@ -336,8 +385,15 @@ public: } }; -class EvalFilter final : public Operator<EvalFilter, 2>, public ExpressionSyntaxSort { - using Base = Operator<EvalFilter, 2>; +/** + * EvalFilter defines a context for path behavior used to evaluate boolean conditions for the + * purposes of filtering. Some path elements have special behavior under this context. + * + * EvalFilter evaluates its path child over the result of its expression child. If the result of + * the path application is false, the value is filtered out, otherwise it's returned up the tree. + */ +class EvalFilter final : public Operator<2>, public ExpressionSyntaxSort { + using Base = Operator<2>; public: EvalFilter(ABT inPath, ABT inInput) : Base(std::move(inPath), std::move(inInput)) { @@ -363,9 +419,9 @@ public: }; /** - * This class represents a source of values originating from relational nodes. + * Represents a source of values typically from a collection. */ -class Source final : public Operator<Source, 0>, public ExpressionSyntaxSort { +class Source final : public Operator<0>, public ExpressionSyntaxSort { public: bool operator==(const Source& other) const { return true; diff --git a/src/mongo/db/query/optimizer/syntax/path.h b/src/mongo/db/query/optimizer/syntax/path.h index dd216a0da36..84f219afa19 100644 --- a/src/mongo/db/query/optimizer/syntax/path.h +++ b/src/mongo/db/query/optimizer/syntax/path.h @@ -38,13 +38,18 @@ namespace mongo::optimizer { /** - * A constant path element - any input value is disregarded and replaced by the result of (constant) - * expression. + * Marker class for paths. Mutually exclusive with nodes and expressions. + */ +class PathSyntaxSort {}; + +/** + * A constant path element - any input value is disregarded and replaced by the result of the child + * expression. The child expression does not depend on the values unpacked by the path. * * It could also be expressed as lambda that ignores its input: \ _ . c */ -class PathConstant final : public Operator<PathConstant, 1>, public PathSyntaxSort { - using Base = Operator<PathConstant, 1>; +class PathConstant final : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; public: PathConstant(ABT inConstant) : Base(std::move(inConstant)) { @@ -68,8 +73,8 @@ public: * A lambda path element - the expression must be a single argument lambda. The lambda is applied * with the input value. */ -class PathLambda final : public Operator<PathLambda, 1>, public PathSyntaxSort { - using Base = Operator<PathLambda, 1>; +class PathLambda final : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; public: PathLambda(ABT inLambda) : Base(std::move(inLambda)) { @@ -86,11 +91,11 @@ public: }; /** - * An identity path element - the input is not disturbed at all. + * An identity path element. Returns the input undisturbed. It can be expressed as lambda : \ x . x. * - * It could also be expressed as lambda : \ x . x + * Not permitted under EvalFilter. */ -class PathIdentity final : public Operator<PathIdentity, 0>, public PathSyntaxSort { +class PathIdentity final : public Operator<0>, public PathSyntaxSort { public: bool operator==(const PathIdentity& other) const { return true; @@ -98,11 +103,14 @@ public: }; /** - * A default path element - If input is Nothing then return the result of expression (assumed to - * return non-Nothing) otherwise return the input undisturbed. + * A default path element - combines an existence check with a replacement step. + * Under EvalPath: If input is Nothing then return the result of the child expression, otherwise + * return the input undisturbed. + * Under EvalFilter: If input is Nothing then return the result of the child expression, otherwise + * return the child expression negated. */ -class PathDefault final : public Operator<PathDefault, 1>, public PathSyntaxSort { - using Base = Operator<PathDefault, 1>; +class PathDefault final : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; public: PathDefault(ABT inDefault) : Base(std::move(inDefault)) { @@ -119,17 +127,20 @@ public: }; /** - * A comparison path element - the input value is compared to the result of (constant) expression. - * The actual semantics (return value) depends on what component is evaluating the paths (i.e. - * filter or project). + * A comparison path element - compares the input value to the result of the child expression using + * a comparison operator, and returns a boolean indicating the result of the comparison. The child + * expression does not depend on the values unpacked by the path. + * + * Not permitted under EvalPath. */ -class PathCompare : public Operator<PathCompare, 1>, public PathSyntaxSort { - using Base = Operator<PathCompare, 1>; +class PathCompare : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; Operations _cmp; public: PathCompare(Operations inCmp, ABT inVal) : Base(std::move(inVal)), _cmp(inCmp) { + tassert(6684500, "Comparison op expected", isComparisonOp(_cmp)); assertExprSort(getVal()); } @@ -151,10 +162,12 @@ public: }; /** - * A drop path element - drops fields from the input if it is an object otherwise returns it - * undisturbed. + * A drop path element - If the input is an object, drops the specified fields, otherwise returns + * the input unmodified. The fields are treated as simple field paths. + * + * Not permitted under EvalFilter. */ -class PathDrop final : public Operator<PathDrop, 0>, public PathSyntaxSort { +class PathDrop final : public Operator<0>, public PathSyntaxSort { public: using NameSet = std::set<std::string>; @@ -173,10 +186,12 @@ private: }; /** - * A keep path element - keeps fields from the input if it is an object otherwise returns it - * undisturbed. + * A keep path element - If the input is an object, keeps the specified fields, otherwise returns + * the input unmodified. The fields are treated as simple field paths. + * + * Not permitted in EvalFilter. */ -class PathKeep final : public Operator<PathKeep, 0>, public PathSyntaxSort { +class PathKeep final : public Operator<0>, public PathSyntaxSort { public: using NameSet = std::set<std::string>; @@ -195,9 +210,11 @@ private: }; /** - * Returns input undisturbed if it is an object otherwise return Nothing. + * Combines an object type check with a potential replacement. + * Under EvalPath: If input is an object then return it unmodified, otherwise return Nothing. + * Under EvalFilter: If input is an object then return true, otherwise return false. */ -class PathObj final : public Operator<PathObj, 0>, public PathSyntaxSort { +class PathObj final : public Operator<0>, public PathSyntaxSort { public: bool operator==(const PathObj& other) const { return true; @@ -205,9 +222,11 @@ public: }; /** - * Returns input undisturbed if it is an array otherwise return Nothing. + * Combines an array type check with a potential replacement. + * Under EvalPath: If input is an object then return it unmodified, otherwise return Nothing. + * Under EvalFilter: If input is an object then return true, otherwise return false. */ -class PathArr final : public Operator<PathArr, 0>, public PathSyntaxSort { +class PathArr final : public Operator<0>, public PathSyntaxSort { public: bool operator==(const PathArr& other) const { return true; @@ -215,12 +234,17 @@ public: }; /** - * A traverse path element - apply the inner path to every element of an array. + * A traverse path element - if the input is not an array, applies the inner path to the input. + * Otherwise, recursively evaluates on each element of the array. + * + * Under EvalPath: re-assembles the result from each element into an array, and returns the array. + * Under EvalFilter: returns true if the inner path applied to any of the array elements is true. + * * Specifies a maximum depth of the traversal: how many nested arrays are we allowed to descend. "0" - * specifies unlimited depth. + * denotes unlimited depth. */ -class PathTraverse final : public Operator<PathTraverse, 1>, public PathSyntaxSort { - using Base = Operator<PathTraverse, 1>; +class PathTraverse final : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; public: static constexpr size_t kUnlimited = 0; @@ -229,8 +253,9 @@ public: PathTraverse(ABT inPath, const size_t maxDepth) : Base(std::move(inPath)), _maxDepth(maxDepth) { assertPathSort(getPath()); - uassert(6743600, - "For now only 0 and 1 is supported for maxDepth", + // TODO SERVER-67306: Support different maxDepth values. + tassert(6743600, + "maxDepth must be either 0 or 1", maxDepth == kUnlimited || maxDepth == kSingleLevel); } @@ -255,10 +280,16 @@ private: }; /** - * A field path element - apply the inner path to an object field. + * A field path element - models the act of setting a field in an object. Extracts the specified + * field from the input and runs the inner path with that value following "get" semantics. Then, + * 1. If its input is an object: sets the field in the input to the result. + * 2. If the input is not an object: returns the input unmodified if the inner path returned + * Nothing, otherwise returns an object with the single field and the result as its value. + * + * Not permitted in EvalFilter. */ -class PathField final : public Operator<PathField, 1>, public PathSyntaxSort { - using Base = Operator<PathField, 1>; +class PathField final : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; std::string _name; public: @@ -284,10 +315,14 @@ public: }; /** - * A get path element - similar to the path element. + * A get path element. If the input is an object and the specified field exists in it, gets the + * value for the field, and returns the result of the inner path applied to the value. Otherwise, + * returns the result of the inner path applied to Nothing. + * + * The specified field name is treated as a simple path. */ -class PathGet final : public Operator<PathGet, 1>, public PathSyntaxSort { - using Base = Operator<PathGet, 1>; +class PathGet final : public Operator<1>, public PathSyntaxSort { + using Base = Operator<1>; std::string _name; public: @@ -314,9 +349,13 @@ public: /** * A multiplicative composition path element. + * Under EvalPath: evaluates the first inner path evaluated over the input, and then evaluates the + * second inner path over that result. + * Under EvalFilter: evaluates both inner paths over the input. Returns true if both inner paths + * return true. */ -class PathComposeM final : public Operator<PathComposeM, 2>, public PathSyntaxSort { - using Base = Operator<PathComposeM, 2>; +class PathComposeM final : public Operator<2>, public PathSyntaxSort { + using Base = Operator<2>; public: PathComposeM(ABT inPath1, ABT inPath2) : Base(std::move(inPath1), std::move(inPath2)) { @@ -338,10 +377,13 @@ public: }; /** - * An additive composition path element. + * An additive composition path element. Runs the inner paths with the input and returns true if + * either inner path returns true. + * + * Not permitted within EvalPath. */ -class PathComposeA final : public Operator<PathComposeA, 2>, public PathSyntaxSort { - using Base = Operator<PathComposeA, 2>; +class PathComposeA final : public Operator<2>, public PathSyntaxSort { + using Base = Operator<2>; public: PathComposeA(ABT inPath1, ABT inPath2) : Base(std::move(inPath1), std::move(inPath2)) { diff --git a/src/mongo/db/query/optimizer/syntax/syntax.h b/src/mongo/db/query/optimizer/syntax/syntax.h index 0abfb54a0f1..d44d32fbc00 100644 --- a/src/mongo/db/query/optimizer/syntax/syntax.h +++ b/src/mongo/db/query/optimizer/syntax/syntax.h @@ -92,14 +92,13 @@ using ABT = algebra::PolyValue<Blackhole, References, // utilities ExpressionBinder>; -template <typename Derived, size_t Arity> +template <size_t Arity> using Operator = algebra::OpSpecificArity<ABT, Arity>; -template <typename Derived, size_t Arity> +template <size_t Arity> using OperatorDynamic = algebra::OpSpecificDynamicArity<ABT, Arity>; -template <typename Derived> -using OperatorDynamicHomogenous = OperatorDynamic<Derived, 0>; +using OperatorDynamicHomogenous = OperatorDynamic<0>; using ABTVector = std::vector<ABT>; @@ -115,20 +114,12 @@ inline auto makeSeq(Args&&... args) { return seq; } -class ExpressionSyntaxSort {}; - -class PathSyntaxSort {}; - inline void assertExprSort(const ABT& e) { - if (!e.is<ExpressionSyntaxSort>()) { - uasserted(6624058, "expression syntax sort expected"); - } + tassert(6624058, "expression syntax sort expected", e.is<ExpressionSyntaxSort>()); } inline void assertPathSort(const ABT& e) { - if (!e.is<PathSyntaxSort>()) { - uasserted(6624059, "path syntax sort expected"); - } + tassert(6624059, "path syntax sort expected", e.is<PathSyntaxSort>()); } inline bool operator!=(const ABT& left, const ABT& right) { @@ -172,6 +163,22 @@ inline constexpr bool isBinaryOp(Operations op) { return !isUnaryOp(op); } +inline constexpr bool isComparisonOp(Operations op) { + switch (op) { + case Operations::Eq: + case Operations::EqMember: + case Operations::Neq: + case Operations::Gt: + case Operations::Gte: + case Operations::Lt: + case Operations::Lte: + case Operations::Cmp3w: + return true; + default: + return false; + } +} + inline constexpr Operations reverseComparisonOp(Operations op) { switch (op) { case Operations::Eq: @@ -196,7 +203,7 @@ inline constexpr Operations reverseComparisonOp(Operations op) { * This is a special inert ABT node. It is used by rewriters to preserve structural properties of * nodes during in-place rewriting. */ -class Blackhole final : public Operator<Blackhole, 0> { +class Blackhole final : public Operator<0> { public: bool operator==(const Blackhole& other) const { return true; @@ -214,11 +221,11 @@ public: * the optimizer developers. * On the other hand using Variables everywhere makes writing code more verbose, hence this helper. */ -class References final : public OperatorDynamicHomogenous<References> { - using Base = OperatorDynamicHomogenous<References>; +class References final : public OperatorDynamicHomogenous { + using Base = OperatorDynamicHomogenous; public: - /* + /** * Construct Variable objects out of provided vector of strings. */ References(const std::vector<std::string>& names) : Base(ABTVector{}) { @@ -228,7 +235,7 @@ public: } } - /* + /** * Alternatively, construct references out of provided ABTs. This may be useful when the * internal references are more complex then a simple string. We may consider e.g. GROUP BY * (a+b). @@ -245,12 +252,12 @@ public: }; /** - * This class represents a unified way of binding identifiers to expressions. Every ABT node that - * introduces a new identifier must use this binder (i.e. all relational nodes adding new - * projections and expression nodes adding new local variables). + * This class represents a unified way of binding identifiers (strings) to expressions. Every ABT + * node that introduces a new identifier must use this binder (i.e. all relational nodes adding new + * projections). */ -class ExpressionBinder : public OperatorDynamicHomogenous<ExpressionBinder> { - using Base = OperatorDynamicHomogenous<ExpressionBinder>; +class ExpressionBinder : public OperatorDynamicHomogenous { + using Base = OperatorDynamicHomogenous; std::vector<std::string> _names; public: diff --git a/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h b/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h index b21781959e4..3662a7b6ca6 100644 --- a/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h +++ b/src/mongo/db/query/optimizer/syntax/syntax_fwd_declare.h @@ -98,4 +98,7 @@ class RootNode; */ class References; class ExpressionBinder; + +class PathSyntaxSort; +class ExpressionSyntaxSort; } // namespace mongo::optimizer |