diff options
author | Drew Paroski <drew.paroski@mongodb.com> | 2022-12-19 20:19:32 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-12-20 01:21:18 +0000 |
commit | b0d507fb7c7f255c12b58736f6ceb0675ed75607 (patch) | |
tree | 41efce50da4b42068023e9b9503feccb0fba3203 /src/mongo | |
parent | 61f4394d50b28e43267f335e1acf1360cb041efd (diff) | |
download | mongo-b0d507fb7c7f255c12b58736f6ceb0675ed75607.tar.gz |
SERVER-69876 De-stage-ify the SBE filter stage builder
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 96 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_coll_scan.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_eval_frame.h | 114 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_filter.cpp | 1475 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_filter.h | 43 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_helpers.cpp | 233 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_helpers.h | 336 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_index_scan.cpp | 52 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_projection.cpp | 3 |
9 files changed, 561 insertions, 1823 deletions
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index ccba7dfce4f..8ff023b0d76 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -596,39 +596,41 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder namespace { std::unique_ptr<sbe::EExpression> generatePerColumnPredicate(StageBuilderState& state, const MatchExpression* me, - const sbe::EVariable& inputVar) { + EvalExpr expr) { switch (me->matchType()) { // These are always safe since they will never match documents missing their field, or where // the element is an object or array. case MatchExpression::REGEX: - return generateRegexExpr(state, checked_cast<const RegexMatchExpression*>(me), inputVar) + return generateRegexExpr( + state, checked_cast<const RegexMatchExpression*>(me), std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::MOD: - return generateModExpr(state, checked_cast<const ModMatchExpression*>(me), inputVar) + return generateModExpr( + state, checked_cast<const ModMatchExpression*>(me), std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::BITS_ALL_SET: return generateBitTestExpr(state, checked_cast<const BitTestMatchExpression*>(me), sbe::BitTestBehavior::AllSet, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::BITS_ALL_CLEAR: return generateBitTestExpr(state, checked_cast<const BitTestMatchExpression*>(me), sbe::BitTestBehavior::AllClear, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::BITS_ANY_SET: return generateBitTestExpr(state, checked_cast<const BitTestMatchExpression*>(me), sbe::BitTestBehavior::AnySet, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::BITS_ANY_CLEAR: return generateBitTestExpr(state, checked_cast<const BitTestMatchExpression*>(me), sbe::BitTestBehavior::AnyClear, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::EXISTS: return makeConstant(sbe::value::TypeTags::Boolean, true); @@ -636,46 +638,46 @@ std::unique_ptr<sbe::EExpression> generatePerColumnPredicate(StageBuilderState& return generateComparisonExpr(state, checked_cast<const ComparisonMatchExpression*>(me), sbe::EPrimBinary::less, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::GT: return generateComparisonExpr(state, checked_cast<const ComparisonMatchExpression*>(me), sbe::EPrimBinary::greater, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::EQ: return generateComparisonExpr(state, checked_cast<const ComparisonMatchExpression*>(me), sbe::EPrimBinary::eq, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::LTE: return generateComparisonExpr(state, checked_cast<const ComparisonMatchExpression*>(me), sbe::EPrimBinary::lessEq, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::GTE: return generateComparisonExpr(state, checked_cast<const ComparisonMatchExpression*>(me), sbe::EPrimBinary::greaterEq, - inputVar) + std::move(expr)) .extractExpr(state.slotVarMap); case MatchExpression::MATCH_IN: { - auto expr = checked_cast<const InMatchExpression*>(me); + const auto* ime = checked_cast<const InMatchExpression*>(me); tassert(6988583, "Push-down of non-scalar values in $in is not supported.", - !expr->hasNonScalarOrNonEmptyValues()); - return generateInExpr(state, expr, inputVar).extractExpr(state.slotVarMap); + !ime->hasNonScalarOrNonEmptyValues()); + return generateInExpr(state, ime, std::move(expr)).extractExpr(state.slotVarMap); } case MatchExpression::TYPE_OPERATOR: { - const auto& expr = checked_cast<const TypeMatchExpression*>(me); - const MatcherTypeSet& ts = expr->typeSet(); + const auto* tme = checked_cast<const TypeMatchExpression*>(me); + const MatcherTypeSet& ts = tme->typeSet(); return makeFunction( "typeMatch", - inputVar.clone(), + expr.extractExpr(state.slotVarMap), makeConstant(sbe::value::TypeTags::NumberInt64, sbe::value::bitcastFrom<int64_t>(ts.getBSONTypeMask()))); } @@ -691,10 +693,10 @@ std::unique_ptr<sbe::EExpression> generateLeafExpr(StageBuilderState& state, const MatchExpression* me, sbe::FrameId lambdaFrameId, sbe::value::SlotId inputSlot) { - sbe::EVariable lambdaParam = sbe::EVariable{lambdaFrameId, 0}; + auto lambdaParam = makeVariable(lambdaFrameId, 0); auto lambdaExpr = sbe::makeE<sbe::ELocalLambda>( - lambdaFrameId, generatePerColumnPredicate(state, me, lambdaParam)); + lambdaFrameId, generatePerColumnPredicate(state, me, std::move(lambdaParam))); const MatchExpression::MatchType mt = me->matchType(); auto traverserName = (mt == MatchExpression::EXISTS || mt == MatchExpression::TYPE_OPERATOR) @@ -847,18 +849,13 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // Generate post assembly filter. if (csn->postAssemblyFilter) { - auto relevantSlots = sbe::makeSV(reconstructedRecordSlot); - if (ridSlot) { - relevantSlots.push_back(*ridSlot); - } + auto filterExpr = generateFilter( + _state, csn->postAssemblyFilter.get(), reconstructedRecordSlot, &outputs); - auto [_, outputStage] = generateFilter(_state, - csn->postAssemblyFilter.get(), - {std::move(stage), std::move(relevantSlots)}, - reconstructedRecordSlot, - &outputs, - csn->nodeId()); - stage = outputStage.extractStage(csn->nodeId()); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(_state.slotVarMap), csn->nodeId()); + } } return {std::move(stage), std::move(outputs)}; @@ -960,21 +957,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder } for (size_t i = 0; i < topLevelFields.size(); ++i) { - outputs.set(std::make_pair(PlanStageSlots::kField, topLevelFields[i]), + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(topLevelFields[i])), topLevelFieldSlots[i]); } if (fn->filter) { - auto forwardingReqs = reqs.copy().set(kResult).setFields(std::move(topLevelFields)); - auto relevantSlots = getSlotsToForward(forwardingReqs, outputs); - - auto [_, outputStage] = generateFilter(_state, - fn->filter.get(), - {std::move(stage), std::move(relevantSlots)}, - resultSlot, - &outputs, - root->nodeId()); - stage = outputStage.extractStage(root->nodeId()); + auto filterExpr = generateFilter(_state, fn->filter.get(), resultSlot, &outputs); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(_state.slotVarMap), root->nodeId()); + } } auto fieldsSet = StringSet{fields.begin(), fields.end()}; @@ -1721,8 +1713,6 @@ SlotBasedStageBuilder::buildProjectionDefault(const QuerySolutionNode* root, auto [stage, outputs] = build(pn->children[0].get(), childReqs); - auto relevantSlots = getSlotsToForward(childReqs, outputs); - auto projectionExpr = generateProjection(_state, &projection, outputs.get(kResult), &outputs); auto [resultSlot, resultStage] = projectEvalExpr(std::move(projectionExpr), EvalStage{std::move(stage), {}}, @@ -1821,7 +1811,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder } } - childReqs.setFields(fields); + childReqs.setFields(std::move(fields)); sbe::PlanStage::Vector inputStages; std::vector<sbe::value::SlotVector> inputSlots; @@ -1849,16 +1839,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder } if (orn->filter) { - auto forwardingReqs = reqs.copy().set(kResult).setFields(std::move(fields)); - auto relevantSlots = getSlotsToForward(forwardingReqs, outputs); - - auto [_, outputStage] = generateFilter(_state, - orn->filter.get(), - {std::move(stage), std::move(relevantSlots)}, - outputs.get(kResult), - &outputs, - root->nodeId()); - stage = outputStage.extractStage(root->nodeId()); + auto resultSlot = outputs.get(kResult); + auto filterExpr = generateFilter(_state, orn->filter.get(), resultSlot, &outputs); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(_state.slotVarMap), root->nodeId()); + } } outputs.clearNonRequiredSlots(reqs); diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp index a7958c96cbe..da2867b1ef5 100644 --- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp @@ -473,21 +473,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo invariant(!csn->stopApplyingFilterAfterFirstMatch || csn->filter); if (csn->filter) { - auto relevantSlots = sbe::makeSV(resultSlot, recordIdSlot); - if (tsSlot) { - relevantSlots.push_back(*tsSlot); + auto filterExpr = generateFilter(state, csn->filter.get(), resultSlot, nullptr); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(state.slotVarMap), csn->nodeId()); } - relevantSlots.insert(relevantSlots.end(), fieldSlots.begin(), fieldSlots.end()); - - auto [_, outputStage] = generateFilter(state, - csn->filter.get(), - {std::move(stage), std::move(relevantSlots)}, - resultSlot, - nullptr /* planStageSlots */, - csn->nodeId()); - stage = outputStage.extractStage(csn->nodeId()); - // We may be requested to stop applying the filter after the first match. This can happen // if the query is just a lower bound on 'ts' on a forward scan. In this case every document // in the collection after the first matching one must also match, so there is no need to @@ -658,16 +649,11 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc // 'generateOptimizedOplogScan()'. invariant(!csn->stopApplyingFilterAfterFirstMatch); - auto relevantSlots = sbe::makeSV(resultSlot, recordIdSlot); - relevantSlots.insert(relevantSlots.end(), fieldSlots.begin(), fieldSlots.end()); - - auto [_, outputStage] = generateFilter(state, - csn->filter.get(), - {std::move(stage), std::move(relevantSlots)}, - resultSlot, - &outputs, - csn->nodeId()); - stage = outputStage.extractStage(csn->nodeId()); + auto filterExpr = generateFilter(state, csn->filter.get(), resultSlot, &outputs); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(state.slotVarMap), csn->nodeId()); + } } return {std::move(stage), std::move(outputs)}; diff --git a/src/mongo/db/query/sbe_stage_builder_eval_frame.h b/src/mongo/db/query/sbe_stage_builder_eval_frame.h index 592f563ee3b..0a19bdd8807 100644 --- a/src/mongo/db/query/sbe_stage_builder_eval_frame.h +++ b/src/mongo/db/query/sbe_stage_builder_eval_frame.h @@ -138,6 +138,10 @@ public: _storage = false; } + std::unique_ptr<sbe::EExpression> getExpr(optimizer::SlotVarMap& varMap) const { + return clone().extractExpr(varMap); + } + /** * Extract the expression on top of the stack as an SBE EExpression node. If the expression is * stored as an ABT node, it is lowered into an SBE expression, using the provided map to @@ -226,116 +230,6 @@ private: sbe::value::SlotVector _outSlots; }; -/** - * To support non-leaf operators in general, SBE builders maintain a stack of EvalFrames. An - * EvalFrame holds a subtree to build on top of (stage), a stack of expressions (exprs) and extra - * data useful for particular builder (data). - * Initially there is only one EvalFrame on the stack which holds the main tree. Non-leaf operators - * can decide to push an EvalFrame on the stack before each of their children is evaluated if - * desired. If a non-leaf operator pushes one or more EvalFrames onto the stack, it is responsible - * for removing these EvalFrames from the stack later. - */ -template <typename T> -class EvalFrame { -public: - template <typename... Args> - EvalFrame(EvalStage stage, Args&&... args) - : _data{std::forward<Args>(args)...}, _stage(std::move(stage)) {} - - const EvalExpr& topExpr() const { - invariant(!_exprs.empty()); - return _exprs.top(); - } - - void pushExpr(EvalExpr expr) { - _exprs.push(std::move(expr)); - } - - EvalExpr popExpr() { - invariant(!_exprs.empty()); - auto expr = std::move(_exprs.top()); - _exprs.pop(); - return expr; - } - - size_t exprsCount() const { - return _exprs.size(); - } - - const T& data() const { - return _data; - } - - T& data() { - return _data; - } - - void setStage(EvalStage stage) { - _stage = std::move(stage); - } - - const EvalStage& getStage() const { - return _stage; - } - - EvalStage extractStage() { - return std::move(_stage); - } - -private: - T _data; - EvalStage _stage; - std::stack<EvalExpr> _exprs; -}; - -/** - * Empty struct for 'data' field in case builder does not need to carry any additional data with - * each frame. - */ -struct NoExtraFrameData {}; - using EvalExprStagePair = std::pair<EvalExpr, EvalStage>; -template <typename Data = NoExtraFrameData> -class EvalStack { -public: - using Frame = EvalFrame<Data>; - - EvalStack() = default; - - template <typename... Args> - void emplaceFrame(Args&&... args) { - stack.emplace(std::forward<Args>(args)...); - } - - Frame& topFrame() { - invariant(!stack.empty()); - return stack.top(); - } - - const Frame& topFrame() const { - invariant(!stack.empty()); - return stack.top(); - } - - EvalExprStagePair popFrame() { - invariant(framesCount() > 0); - auto& frame = topFrame(); - - invariant(frame.exprsCount() == 1); - auto expr = frame.popExpr(); - auto stage = frame.extractStage(); - - stack.pop(); - return {std::move(expr), std::move(stage)}; - } - - size_t framesCount() const { - return stack.size(); - } - -private: - std::stack<Frame> stack; -}; - } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp index 396c3d5b903..63adfcbcb8b 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.cpp +++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp @@ -81,163 +81,125 @@ namespace mongo::stage_builder { namespace { struct MatchExpressionVisitorContext; -const size_t kMaxChildrenForTopLevelAndOptimization = 25; /** - * Output of the tree can come from two places: - * - If there is an expression on the evaluation stack in the end of tree construction, then this - * is the output for the whole tree. This is checked in the 'MatchExpressionVisitorContext::done' - * method. - * - If we apply top-level AND optimization, then in the end of tree construction the evaluation - * stack will be empty. This happens because expressions which normally would reside on the stack - * are popped and inserted directly into the filter stage for each branch. - * - * So, we need to record output in both the 'MatchExpressionVisitorContext::done' method and builder - * for top-level AND. - * - * This function takes the current expression, projects it into a separate slot and stores this slot - * as an output for the current frame. + * A function of type 'MakePredicateFn' can be called to generate an EExpression which applies + * a predicate to the value found in 'inputExpr'. */ -void projectCurrentExprToOutputSlot(MatchExpressionVisitorContext* context); +using MakePredicateFn = std::function<std::unique_ptr<sbe::EExpression>(EvalExpr inputExpr)>; /** - * The various flavors of PathMatchExpressions require the same skeleton of traverseF()/lambdas or - * TraverseStage in order to perform path traversal. - * - * A function of type 'MakePredicateExprFn' can be called to generate an EExpression which applies - * a predicate to the value found in 'var'. - * - * A function of type 'MakePredicateFn' can be called to generate an EvalExprStagePair which applies - * a predicate to the value found in 'slot'. Newly generated stages (if any) will be built on top of - * 'inputStage'. + * A struct for storing context across calls to visit() methods in MatchExpressionPreVisitor, + * MatchExpressionInVisitor, and MatchExpressionPostVisitor. */ -using MakePredicateExprFn = - std::function<std::unique_ptr<sbe::EExpression>(const sbe::EVariable& var)>; +struct MatchExpressionVisitorContext { + struct MatchFrame { + /** + * MatchFrame's constructor has 3 parameters. 'inputExpr' provides the input source, and is + * expected to be a local variable or a slot. 'frameId' is the FrameId of the current lambda + * (or boost::none if there is no current lambda). By default, 'childOfElemMatchValue' is + * false and generatePredicate() will generate a traversal for the current MatchExpression's + * field path (using 'inputExpr' as the base of the traversal) when applying the predicate. + * When 'childOfElemMatchValue' is set to true, generatePredicate() will ignore the current + * MatchExpression's field path and just apply the predicate directly on 'inputExpr'. + */ + MatchFrame(EvalExpr inputExpr, + boost::optional<sbe::FrameId> frameId = boost::none, + bool childOfElemMatchValue = false) + : inputExpr(std::move(inputExpr)), + frameId(frameId), + childOfElemMatchValue(childOfElemMatchValue) {} + + void pushExpr(EvalExpr expr) { + exprStack.push_back(std::move(expr)); + } -using MakePredicateFn = - std::function<EvalExprStagePair(sbe::value::SlotId inputSlot, EvalStage inputStage)>; + EvalExpr popEvalExpr() { + tassert(6987609, "Expected 'exprStack' to be non-empty", !exprStack.empty()); + auto expr = std::move(exprStack.back()); + exprStack.pop_back(); + return expr; + } + + std::unique_ptr<sbe::EExpression> popExpr(optimizer::SlotVarMap& slotVarMap) { + return popEvalExpr().extractExpr(slotVarMap); + } + + size_t exprsCount() const { + return exprStack.size(); + } + + EvalExpr inputExpr; + boost::optional<sbe::FrameId> frameId; + bool childOfElemMatchValue = false; + std::vector<EvalExpr> exprStack; + }; -/** - * A struct for storing context across calls to visit() methods in MatchExpressionVisitor's. - */ -struct MatchExpressionVisitorContext { MatchExpressionVisitorContext(StageBuilderState& state, - EvalStage inputStage, - boost::optional<sbe::value::SlotId> inputSlot, + EvalExpr inputExprParam, const MatchExpression* root, - PlanNodeId planNodeId, const PlanStageSlots* slots, - bool isFilterOverIxscan, - const FilterStateHelper& stateHelper) + bool isFilterOverIxscan) : state{state}, - inputSlot{inputSlot}, + inputExpr{inputExprParam.clone()}, slots{slots}, - isFilterOverIxscan{isFilterOverIxscan}, - topLevelAnd{nullptr}, - planNodeId{planNodeId}, - stateHelper{stateHelper} { + isFilterOverIxscan{isFilterOverIxscan} { tassert(7097201, - "Expected 'inputSlot' or 'slots' to be defined", - inputSlot.has_value() || slots != nullptr); - - // Set up the top-level EvalFrame. - evalStack.emplaceFrame(std::move(inputStage), inputSlot); + "Expected 'inputExpr' or 'slots' to be defined", + !inputExpr.isNull() || slots != nullptr); - // If the root node is an $and, store it in 'topLevelAnd'. - // TODO: SERVER-50673: Revisit how we implement the top-level $and optimization. - if (root->matchType() == MatchExpression::AND && - root->numChildren() <= kMaxChildrenForTopLevelAndOptimization) { - topLevelAnd = root; - } + // Set up the top-level MatchFrame. + emplaceFrame(std::move(inputExprParam)); } - std::pair<boost::optional<sbe::value::SlotId>, EvalStage> done() { - invariant(evalStack.framesCount() == 1); - auto& frame = evalStack.topFrame(); + EvalExpr done() { + invariant(framesCount() == 1); + auto& frame = topFrame(); if (frame.exprsCount() > 0) { - if (stateHelper.stateContainsValue()) { - projectCurrentExprToOutputSlot(this); - } invariant(frame.exprsCount() == 1); - frame.setStage(makeFilter<false>( - frame.extractStage(), - stateHelper.getBool(frame.popExpr().extractExpr(state.slotVarMap)), - planNodeId)); + return frame.popEvalExpr(); } - if (outputSlot && stateHelper.stateContainsValue()) { - // In case 'outputSlot' is defined and state contains a value, we need to extract this - // value into a separate slot and return it. The resulting value depends on the state - // type, see the implementation of specific state helper for details. - return stateHelper.projectValueCombinator(*outputSlot, - frame.extractStage(), - planNodeId, - state.slotIdGenerator, - state.frameIdGenerator); - } + return EvalExpr{}; + } - return {boost::none, frame.extractStage()}; + template <typename... Args> + void emplaceFrame(Args&&... args) { + matchStack.emplace_back(std::forward<Args>(args)...); } - struct FrameData { - FrameData(boost::optional<sbe::value::SlotId> inputSlot, bool childOfElemMatchValue = false) - : inputSlot(inputSlot), childOfElemMatchValue(childOfElemMatchValue) {} + MatchFrame& topFrame() { + tassert(6987600, "Expected matchStack to be non-empty", !matchStack.empty()); + return matchStack.back(); + } - // For an index filter we don't build a traversal sub-tree, and do not use complex - // expressions, such as $elemMatch or nested logical $and/$or/$nor. As such, we don't need - // to create nested EvalFrames, and we don't need an 'inputSlot' for the frame, because - // values are read from the 'slots' map stored in the context. Yet, we still need a top- - // level EvalFrame, as the the entire filter generator logic is based on the assumption - // that we've got at least one EvalFrame. Hence, the 'inputSlot' is declared optional. - boost::optional<sbe::value::SlotId> inputSlot; + const MatchFrame& topFrame() const { + tassert(6987601, "Expected matchStack to be non-empty", !matchStack.empty()); + return matchStack.back(); + } - bool childOfElemMatchValue = false; - }; + void popFrame() { + tassert(6987602, "Expected frame's exprStack to be empty", topFrame().exprsCount() == 0); + matchStack.pop_back(); + } - StageBuilderState& state; + size_t framesCount() const { + return matchStack.size(); + } - EvalStack<FrameData> evalStack; + StageBuilderState& state; + std::vector<MatchFrame> matchStack; - // The current context must be initialized either with a slot containing the entire document - // ('inputSlot') or with set of kField slots ('slots'). - boost::optional<sbe::value::SlotId> inputSlot; + // The current context must be initialized either with an EvalExpr that produces the root + // document ('inputExpr') or with the set of kField slots ('slots'). + EvalExpr inputExpr; const PlanStageSlots* slots = nullptr; - bool isFilterOverIxscan = false; - - const MatchExpression* topLevelAnd; - - // The id of the 'QuerySolutionNode' which houses the match expression that we are converting to - // SBE. - const PlanNodeId planNodeId; - - // Helper for managing the internal state of the filter tree. See 'FilterStateHelper' definition - // for details. - const FilterStateHelper& stateHelper; - - // Trees for some queries can have something to output. For instance, if we use - // 'IndexStateHelper' for managing internal state, this output is the index of the array element - // that matched our query predicate. This field stores the slot id containing the output of the - // tree. - boost::optional<sbe::value::SlotId> outputSlot; }; -void projectCurrentExprToOutputSlot(MatchExpressionVisitorContext* context) { - tassert(5291405, "Output slot is not empty", !context->outputSlot); - auto& frame = context->evalStack.topFrame(); - auto [projectedExprSlot, stage] = projectEvalExpr(frame.popExpr(), - frame.extractStage(), - context->planNodeId, - context->state.slotIdGenerator, - context->state.slotVarMap); - context->outputSlot = projectedExprSlot; - frame.pushExpr(projectedExprSlot); - frame.setStage(std::move(stage)); -} - enum class LeafTraversalMode { - // Don't generate a TraverseStage for the leaf. + // Don't traverse the leaf. kDoNotTraverseLeaf = 0, // Traverse the leaf, and for arrays visit both the array's elements _and_ the array itself. @@ -248,17 +210,18 @@ enum class LeafTraversalMode { }; std::unique_ptr<sbe::EExpression> generateTraverseF( - std::unique_ptr<sbe::EExpression> inputVar, + EvalExpr inputExpr, boost::optional<sbe::value::SlotId> topLevelFieldSlot, const sbe::MatchPath& fp, FieldIndex level, sbe::value::FrameIdGenerator* frameIdGenerator, - const MakePredicateExprFn& makePredicateExpr, + optimizer::SlotVarMap& slotVarMap, + const MakePredicateFn& makePredicate, bool matchesNothing, LeafTraversalMode mode) { tassert(7097202, "Expected an input expression or top level field", - inputVar.get() || topLevelFieldSlot.has_value()); + !inputExpr.isNull() || topLevelFieldSlot.has_value()); // If 'level' is currently pointing to the second last part of the field path AND the last // part of the field path is "", then 'childIsLeafWithEmptyName' will be true. Otherwise it @@ -271,11 +234,11 @@ std::unique_ptr<sbe::EExpression> generateTraverseF( const bool needsNothingCheck = !isLeafField && matchesNothing; auto lambdaFrameId = frameIdGenerator->generate(); - auto lambdaParam = sbe::EVariable{lambdaFrameId, 0}; + auto lambdaParam = EvalExpr{makeVariable(lambdaFrameId, 0)}; auto fieldExpr = topLevelFieldSlot ? makeVariable(*topLevelFieldSlot) - : makeFunction("getField", inputVar->clone(), makeConstant(fp.getPart(level))); + : makeFunction("getField", inputExpr.getExpr(slotVarMap), makeConstant(fp.getPart(level))); if (childIsLeafWithEmptyName) { auto frameId = frameIdGenerator->generate(); @@ -289,13 +252,14 @@ std::unique_ptr<sbe::EExpression> generateTraverseF( frameId, sbe::makeEs(std::move(fieldExpr)), std::move(expr)); } - auto resultExpr = isLeafField ? makePredicateExpr(lambdaParam) + auto resultExpr = isLeafField ? makePredicate(lambdaParam.clone()) : generateTraverseF(lambdaParam.clone(), boost::none /* topLevelFieldSlot */, fp, level + 1, frameIdGenerator, - makePredicateExpr, + slotVarMap, + makePredicate, matchesNothing, mode); @@ -310,7 +274,7 @@ std::unique_ptr<sbe::EExpression> generateTraverseF( // effectively allows us to skip over cases where we would be calling getField() on a scalar // value or an array and getting back Nothing. The subset of such cases where we should // return true is handled by the previous level before execution would reach here. - auto cond = makeFillEmptyFalse(makeFunction("isObject", lambdaParam.clone())); + auto cond = makeFillEmptyFalse(makeFunction("isObject", lambdaParam.getExpr(slotVarMap))); resultExpr = sbe::makeE<sbe::EIf>(std::move(cond), std::move(resultExpr), @@ -349,8 +313,9 @@ std::unique_ptr<sbe::EExpression> generateTraverseF( sbe::value::bitcastFrom<int64_t>(getBSONTypeMask(BSONType::Array) | getBSONTypeMask(BSONType::Object))))), std::move(traverseFExpr), - inputVar ? makeNot(makeFillEmptyFalse(makeFunction("isArray", inputVar->clone()))) - : makeConstant(sbe::value::TypeTags::Boolean, true)); + !inputExpr.isNull() ? makeNot(makeFillEmptyFalse( + makeFunction("isArray", inputExpr.getExpr(slotVarMap)))) + : makeConstant(sbe::value::TypeTags::Boolean, true)); } if (frameId) { @@ -362,326 +327,6 @@ std::unique_ptr<sbe::EExpression> generateTraverseF( } /** - * This function generates a path traversal plan stage at the given nested 'level' of the traversal - * path. For example, for a dotted path expression {'a.b': 2}, the traversal sub-tree built with - * 'BooleanStateHelper' will look like this: - * - * traverse - * outputSlot1 // the traversal result - * innerSlot1 // the result coming from the 'in' branch - * fieldSlot1 // field 'a' projected in the 'from' branch, this is the field we will be - * // traversing - * {outputSlot1 || innerSlot1} // the folding expression - combining results for each - * // element - * {outputSlot1} // final (early out) expression - when we hit the 'true' value, we don't - * // have to traverse the whole array - * from - * project [fieldSlot1 = getField(inputSlot, "a")] // project field 'a' from the document - * // bound to 'inputSlot' - * <inputStage> // e.g. collection scan - * in - * project [innerSlot1 = // if getField(fieldSlot1,'b') - * fillEmpty(outputSlot2, false) || // returns an array, compare the - * (fillEmpty(isArray(fieldSlot2), false) && // array itself to 2 as well - * fillEmpty(fieldSlot2 == 2, false))] - * traverse // nested traversal - * outputSlot2 // the traversal result - * innerSlot2 // the result coming from the 'in' branch - * fieldSlot2 // field 'b' projected in the 'from' branch, this is the field we will be - * // traversing - * {outputSlot2 || innerSlot2} // the folding expression - * {outputSlot2} // final (early out) expression - * from - * project [fieldSlot2 = getField(fieldSlot1, "b")] // project field 'b' from the - * // document bound to 'fieldSlot1', - * // which is field 'a' - * limit 1 - * coscan - * in - * project [innerSlot2 = // compare the field 'b' to 2 and - * fillEmpty(fieldSlot2 == 2, false)] // store the result in innerSlot2 - * limit 1 - * coscan - */ -EvalExprStagePair generatePathTraversal(EvalStage inputStage, - boost::optional<sbe::value::SlotId> inputDocumentSlot, - boost::optional<sbe::value::SlotId> topLevelFieldSlot, - const sbe::MatchPath& fp, - FieldIndex level, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - sbe::value::FrameIdGenerator* frameIdGenerator, - optimizer::SlotVarMap& varSlotMap, - const MakePredicateFn& makePredicate, - LeafTraversalMode mode, - const FilterStateHelper& stateHelper) { - using namespace std::literals; - - invariant(level < fp.numParts()); - - tassert(7097203, - "Expected an input slot or top level field", - inputDocumentSlot.has_value() || topLevelFieldSlot.has_value()); - - // If 'level' is currently pointing to the second last part of the field path AND the last - // part of the field path is "", then 'childIsLeafWithEmptyName' will be true. Otherwise it - // will be false. - const bool childIsLeafWithEmptyName = - (level == fp.numParts() - 2u) && fp.isPathComponentEmpty(level + 1); - - const bool isLeafField = (level == fp.numParts() - 1u) || childIsLeafWithEmptyName; - const bool needsArrayCheck = isLeafField && mode == LeafTraversalMode::kArrayAndItsElements; - - // Generate the projection stage to read a sub-field at the current nested level and bind it - // to 'inputSlot'. - auto fieldName = fp.getPart(level); - auto inputSlot = slotIdGenerator->generate(); - - auto fromExpr = topLevelFieldSlot - ? makeVariable(*topLevelFieldSlot) - : makeFunction("getField", makeVariable(*inputDocumentSlot), makeConstant(fieldName)); - - if (childIsLeafWithEmptyName) { - auto frameId = frameIdGenerator->generate(); - sbe::EVariable getFieldValue(frameId, 0); - auto expr = sbe::makeE<sbe::EIf>( - makeFunction("isArray", getFieldValue.clone()), - getFieldValue.clone(), - makeFunction("getField", getFieldValue.clone(), makeConstant(""_sd))); - - fromExpr = - sbe::makeE<sbe::ELocalBind>(frameId, sbe::makeEs(std::move(fromExpr)), std::move(expr)); - } - - auto fromBranch = - makeProject(std::move(inputStage), planNodeId, inputSlot, std::move(fromExpr)); - - if (isLeafField && mode == LeafTraversalMode::kDoNotTraverseLeaf) { - // 'makePredicate' in this mode must return valid state, not just plain boolean value. So - // there is no need to wrap it in '_context->stateHelper.makePredicateCombinator'. - return makePredicate(inputSlot, std::move(fromBranch)); - } - - // Input slot for the inner branch of traverse stage is the same as the input slot holding the - // array. - auto innerInputSlot = inputSlot; - auto traverseInputSlot = inputSlot; - - // Some of MQL expressions need to check predicate not only for each of the array elements, but - // also for the whole array. Predicate tree is located in the inner branch of the traverse stage - // created below. To avoid generating predicate tree two times, we force traverse to be executed - // two times: first to iterate array elements and second to run the predicate tree against whole - // array. - // To achive this, we create union stage in the 'from' branch of traverse. This union stage - // sets the input slot of the traverse stage - 'traverseInputSlot'. Union returns ADVANCED - // two times, forcing traverse to be executed two times with different inputs: - // - First time union returns ADVANCED, 'traverseInputSlot' is set to the input array, stored - // in 'inputSlot'. Traverse stage iterates over array elements (if any) and checks the - // predicate for each of them. - // - Second time union returns ADVANCED, 'traverseInputSlot' is set to Nothing. In this case, - // traverse stage executes predicate only once. - // Since 'from' branch of traverse has union stage, we save current 'fromBranch' to use for - // loop join stage later. - EvalStage innerBranch; - EvalStage loopJoinFromBranch; - if (needsArrayCheck) { - loopJoinFromBranch = std::move(fromBranch); - - auto buildUnionBranch = [&](std::unique_ptr<sbe::EExpression> arrayExpr) { - auto currentArraySlot = slotIdGenerator->generate(); - auto branch = makeProject({}, planNodeId, currentArraySlot, std::move(arrayExpr)); - return std::make_pair(sbe::makeSV(currentArraySlot), std::move(branch)); - }; - - auto [checkArrayElementsSlots, checkArrayElementsStage] = - buildUnionBranch(makeVariable(inputSlot)); - - auto [checkWholeArraySlots, checkWholeArrayStage] = - buildUnionBranch(makeConstant(sbe::value::TypeTags::Nothing, 0)); - - traverseInputSlot = slotIdGenerator->generate(); - fromBranch = makeUnion( - makeVector(std::move(checkArrayElementsStage), std::move(checkWholeArrayStage)), - makeVector(std::move(checkArrayElementsSlots), std::move(checkWholeArraySlots)), - sbe::makeSV(traverseInputSlot), - planNodeId); - } - - boost::optional<sbe::value::SlotId> isTraverseInputArraySlot; - if (needsArrayCheck || !isLeafField || stateHelper.stateContainsValue()) { - isTraverseInputArraySlot = slotIdGenerator->generate(); - fromBranch = makeProject( - std::move(fromBranch), - planNodeId, - *isTraverseInputArraySlot, - makeFillEmptyFalse(makeFunction("isArray", makeVariable(traverseInputSlot)))); - } - - // If current input to the traverse stage is an array, this means that we are currently - // checking the predicate against each of the array elements. 'traverseInputSlot', holding - // current array element, should be passed to the predicate. - // If current input to the traverse stage is not an array, this could mean two things: - // - Value in the 'inputSlot' is not the array - // - We are checking the predicate against the whole array - // In both cases, 'inputSlot' should be passed to the predicate. - if (needsArrayCheck) { - innerInputSlot = slotIdGenerator->generate(); - innerBranch = makeProject(std::move(innerBranch), - planNodeId, - innerInputSlot, - sbe::makeE<sbe::EIf>(makeVariable(*isTraverseInputArraySlot), - makeVariable(traverseInputSlot), - makeVariable(inputSlot))); - } - - // For the non leaf nodes we insert a filter that allows the nested getField only for objects. - // But only if the outer value is an array. This is relevant in this example: given 2 documents - // {a:10} and {a:[10]} the filer {'a.b':null} returns the first document but not the second. - // Without the filter we'd try to traverse 'a', and in both cases the inner side of the - // 'traverse' would get the value '10'. However, in the first case we'd try to apply getField() - // to a standalone scalar, which would return a missing field, which is equal to null, whilst in - // a second case to a scalar which is an array element. According to the legacy implementation, - // this is not allowed and we shouldn't try to do a nesting path traversal of the array - // elements, unless an element is an object. - if (!isLeafField) { - innerBranch = - makeFilter<false>(std::move(innerBranch), - makeBinaryOp(sbe::EPrimBinary::logicOr, - makeNot(makeVariable(*isTraverseInputArraySlot)), - makeFunction("isObject", makeVariable(innerInputSlot))), - planNodeId); - } - - // Generate the 'in' branch for the TraverseStage that we're about to construct. - EvalExpr innerExpr; - std::tie(innerExpr, innerBranch) = isLeafField - // Base case: Evaluate the predicate. Predicate returns boolean value, we need to convert it - // to state using 'stateHelper.makePredicateCombinator'. - ? stateHelper.makePredicateCombinator(makePredicate(innerInputSlot, std::move(innerBranch)), - varSlotMap) - // Recursive case. - : generatePathTraversal(std::move(innerBranch), - innerInputSlot, - boost::none /* topLevelFieldSlot */, - fp, - level + 1, - planNodeId, - slotIdGenerator, - frameIdGenerator, - varSlotMap, - makePredicate, - mode, - stateHelper); - - if (stateHelper.stateContainsValue()) { - // The expression below checks if input is an array. In this case it returns initial state. - // This value will be the first one to be stored in 'traverseOutputSlot'. On the subsequent - // iterations 'traverseOutputSlot' is updated according to fold expression. - // If input is not array, expression below simply assigns state from the predicate to the - // 'innerResultSlot'. - // If state does not containy any value apart from boolean, we do not need to perform this - // check. - innerExpr = - makeLocalBind(frameIdGenerator, - [&](sbe::EVariable state) { - return sbe::makeE<sbe::EIf>( - makeVariable(*isTraverseInputArraySlot), - stateHelper.makeInitialState(stateHelper.getBool(state.clone())), - state.clone()); - }, - innerExpr.extractExpr(varSlotMap)); - } - - sbe::value::SlotId innerResultSlot; - std::tie(innerResultSlot, innerBranch) = - projectEvalExpr(std::move(innerExpr), - std::move(innerBranch), // NOLINT(bugprone-use-after-move) - planNodeId, - slotIdGenerator, - varSlotMap); - - // Generate the traverse stage for the current nested level. There are several cases covered - // during this phase: - // 1. If input is not an array, value from 'in' branch is returned (see comment for the 'in' - // branch construction). - // 2. If input is an array of size 1, fold expression is never executed. 'in' branch returns - // initial state, paired with false value if predicate evaluates to false and true value - // otherwise. - // 3. If input is an array of size larger than 1 and predicate does not evaluate to true on the - // first array element, fold expression is executed at least once. See comments for - // respective implementation of 'FilterStateHelper::makeTraverseCombinator' for details. - auto traverseOutputSlot = slotIdGenerator->generate(); - auto outputStage = stateHelper.makeTraverseCombinator( - std::move(fromBranch), - std::move(innerBranch), // NOLINT(bugprone-use-after-move) - traverseInputSlot, - traverseOutputSlot, - innerResultSlot, - planNodeId, - frameIdGenerator); - - // If the traverse stage's input was Nothing, or if the traverse stage's inner branch wasn't - // executed at all (because the input was an empty array), then 'traverseOutputSlot' will - // contain Nothing. In this case we haven't found matching element, so convert Nothing to false. - auto resultExpr = makeBinaryOp(sbe::EPrimBinary::fillEmpty, - makeVariable(traverseOutputSlot), - stateHelper.makeState(false)); - - if (!needsArrayCheck) { - return {std::move(resultExpr), std::move(outputStage)}; - } - - auto outputSlot = slotIdGenerator->generate(); - outputStage = - makeProject(std::move(outputStage), planNodeId, outputSlot, std::move(resultExpr)); - - // In case predicate needs to be checked both for each of the array elements and for whole - // array, traverse stage created above will return ADVANCED two times. To handle that, we - // construct the following tree: - // - // nlj - // left - // <'inputStage' and extracting current field value into 'inputSlot'> - // right - // limit 1 - // filter {!isTraverseInputArraySlot || outputSlot} - // <traverse stage created above> - // - // Let iterate over each part of the tree: - // - Loop join stage is created to hold all stages which usually go into the 'from' branch of - // traverse stage. This includes 'inputStage' and project stage to extract current field - // value. - // - Filter stage ensures that tree below it returns ADVANCED only if the predicate matched - // one of the array elements or the whole array. - // - Limit-1 stage ensures short-circuiting. If one of the array elements matched the - // predicate, filter stage below it returns ADVANCED and we do not execute the predicate - // for the whole array. - // - // To better understand the predicate of the filter stage, let us take a look how the resulting - // tree behaves for various 'inputSlot' values. 'inputSlot' can be: - // - Array. In this case traverse stage will be executed twice: - // 1. 'isTraverseInputArraySlot = true', filter will pass only if 'outputSlot = true', which - // means predicate returned true for one of the array elements. - // 2. 'isTraverseInputArray = false' (since second time traverse input is Nothing), filter - // will always pass. Even though predicate may not match the whole array, we need to return - // something to the stage above us. - // - Not array. In this case traverse stage will be executed once: - // 1. 'isTraverseInputArray = false', filter will always pass. - // 2. Will never happen because of limit-1 stage on top. - outputStage = makeFilter<false>(std::move(outputStage), - makeBinaryOp(sbe::EPrimBinary::logicOr, - makeNot(makeVariable(*isTraverseInputArraySlot)), - stateHelper.getBool(outputSlot)), - planNodeId); - - outputStage = makeLimitSkip(std::move(outputStage), planNodeId, 1); - - outputStage = makeLoopJoin(std::move(loopJoinFromBranch), std::move(outputStage), planNodeId); - - return {outputSlot, std::move(outputStage)}; -} - -/** * Given a field path 'path' and a predicate 'makePredicate', this function generates an SBE tree * that will evaluate the predicate on the field path. When 'path' is not empty string (""), this * function generates a sequence of nested traverse operators to traverse the field path and it uses @@ -689,130 +334,66 @@ EvalExprStagePair generatePathTraversal(EvalStage inputStage, * When 'path' is empty, this function simply uses 'makePredicate' to generate an SBE expression for * evaluating the predicate on a single value. */ -void generatePredicateImpl(MatchExpressionVisitorContext* context, - const sbe::MatchPath& path, - const MakePredicateExprFn& makePredicateExpr, - const MakePredicateFn& makePredicate, - LeafTraversalMode mode, - bool useCombinator = true, - bool matchesNothing = false) { - auto& frame = context->evalStack.topFrame(); - - auto&& [expr, stage] = [&]() { - if (frame.data().childOfElemMatchValue) { - tassert(7097204, - "Expected input slot or key slots to be defined", - frame.data().inputSlot.has_value()); - - // If matchExpr's parent is a ElemMatchValueMatchExpression, then we should just - // apply the predicate directly on 'inputSlot'. 'inputSlot' will be a "correlated - // slot" that holds the value of the ElemMatchValueMatchExpression's field path. - auto result = makePredicate(*frame.data().inputSlot, frame.extractStage()); - if (useCombinator) { - return context->stateHelper.makePredicateCombinator(std::move(result), - context->state.slotVarMap); - } - return result; - } +void generatePredicate(MatchExpressionVisitorContext* context, + const sbe::MatchPath& path, + const MakePredicateFn& makePredicate, + LeafTraversalMode mode, + bool matchesNothing = false) { + auto& frame = context->topFrame(); - const bool isFieldPathOnRootDoc = - (!context->inputSlot || *context->inputSlot == *frame.data().inputSlot); - - boost::optional<sbe::value::SlotId> topLevelFieldSlot; - if (isFieldPathOnRootDoc && context->slots) { - // If we are generating a filter over an index scan, search for a kField slot that - // corresponds to the full path 'path'. - if (context->isFilterOverIxscan && !path.empty()) { - auto name = std::make_pair(PlanStageSlots::kField, path.dottedField()); - if (auto slot = context->slots->getIfExists(name); slot) { - // We found a kField slot that matches. We don't need to perform any traversal; - // we can just evaluate the predicate on the slot directly and return. - auto result = makePredicate(*slot, frame.extractStage()); - if (useCombinator) { - return context->stateHelper.makePredicateCombinator( - std::move(result), context->state.slotVarMap); - } - return result; - } - } + if (frame.childOfElemMatchValue) { + tassert(7097204, "Expected input expr to be defined", !frame.inputExpr.isNull()); - // Search for a kField slot whose path matches the first part of 'path'. - topLevelFieldSlot = context->slots->getIfExists( - std::make_pair(PlanStageSlots::kField, path.getPart(0))); - } + // If matchExpr's parent is a ElemMatchValueMatchExpression, then we should just + // apply the predicate directly on 'inputExpr'. 'inputExpr' will be a lambda + // parameter that holds the value of the ElemMatchValueMatchExpression's field path. + frame.pushExpr(makePredicate(frame.inputExpr.clone())); + return; + } - tassert(7097205, - "Expected either input slot or top-level field slot to be defined", - frame.data().inputSlot.has_value() || topLevelFieldSlot.has_value()); - - // Using traverseF() and lambdas performs better than using TraverseStage, so we prefer - // to use traverseF()/lambdas where possible. We currently support traverseF()/lambdas - // when the caller provides a non-null 'makePredicateExpr' and when 'stateHelper' does - // not contain a value. - if (makePredicateExpr != nullptr && !context->stateHelper.stateContainsValue()) { - auto result = - generateTraverseF(frame.data().inputSlot ? makeVariable(*frame.data().inputSlot) - : std::unique_ptr<sbe::EExpression>{}, - topLevelFieldSlot, - path, - 0, /* level */ - context->state.frameIdGenerator, - makePredicateExpr, - matchesNothing, - mode); - - return EvalExprStagePair{std::move(result), frame.extractStage()}; + const bool isFieldPathOnRootDoc = context->framesCount() == 1; + auto* slots = context->slots; + + boost::optional<sbe::value::SlotId> topLevelFieldSlot; + if (isFieldPathOnRootDoc && slots) { + // If we are generating a filter over an index scan, search for a kField slot that + // corresponds to the full path 'path'. + if (context->isFilterOverIxscan && !path.empty()) { + auto name = std::make_pair(PlanStageSlots::kField, path.dottedField()); + if (auto slot = slots->getIfExists(name); slot) { + // We found a kField slot that matches. We don't need to perform any traversal; + // we can just evaluate the predicate on the slot directly and return. + frame.pushExpr(makePredicate(*slot)); + return; + } } - return generatePathTraversal(frame.extractStage(), - frame.data().inputSlot, + // Search for a kField slot whose path matches the first part of 'path'. + topLevelFieldSlot = + slots->getIfExists(std::make_pair(PlanStageSlots::kField, path.getPart(0))); + } + + tassert(7097205, + "Expected either input expr or top-level field slot to be defined", + !frame.inputExpr.isNull() || topLevelFieldSlot.has_value()); + + frame.pushExpr(generateTraverseF(frame.inputExpr.clone(), topLevelFieldSlot, path, 0, /* level */ - context->planNodeId, - context->state.slotIdGenerator, context->state.frameIdGenerator, context->state.slotVarMap, makePredicate, - mode, - context->stateHelper); - }(); - - frame.setStage(std::move(stage)); - frame.pushExpr(std::move(expr)); -} - -void generatePredicate(MatchExpressionVisitorContext* context, - const sbe::MatchPath& path, - const MakePredicateFn& makePredicate, - LeafTraversalMode mode, - bool useCombinator = true, - bool matchesNothing = false) { - generatePredicateImpl( - context, path, nullptr, makePredicate, mode, useCombinator, matchesNothing); -} - -void generatePredicateExpr(MatchExpressionVisitorContext* context, - const sbe::MatchPath& path, - const MakePredicateExprFn& makePredicateExpr, - LeafTraversalMode mode, - bool useCombinator = true, - bool matchesNothing = false) { - auto makePredicate = [&](sbe::value::SlotId inputSlot, - EvalStage inputStage) -> EvalExprStagePair { - return {makePredicateExpr(sbe::EVariable(inputSlot)), std::move(inputStage)}; - }; - - generatePredicateImpl( - context, path, makePredicateExpr, makePredicate, mode, useCombinator, matchesNothing); + matchesNothing, + mode)); } /** * Generates and pushes a constant boolean expression for either alwaysTrue or alwaysFalse. */ void generateAlwaysBoolean(MatchExpressionVisitorContext* context, bool value) { - auto& frame = context->evalStack.topFrame(); - frame.pushExpr(context->stateHelper.makeState(value)); + auto& frame = context->topFrame(); + frame.pushExpr(makeConstant(sbe::value::TypeTags::Boolean, value)); } /** @@ -837,15 +418,17 @@ void generateArraySize(MatchExpressionVisitorContext* context, return; } - auto makePredicateExpr = [&](const sbe::EVariable& var) { + auto makePredicate = [&](EvalExpr inputExpr) { auto sizeExpr = inputParamSlotId ? makeVariable(*inputParamSlotId) : makeConstant(sbe::value::TypeTags::NumberInt32, size); return makeFillEmptyFalse(makeBinaryOp( - sbe::EPrimBinary::eq, makeFunction("getArraySize", var.clone()), std::move(sizeExpr))); + sbe::EPrimBinary::eq, + makeFunction("getArraySize", inputExpr.extractExpr(context->state.slotVarMap)), + std::move(sizeExpr))); }; - generatePredicateExpr( - context, *matchExpr->fieldRef(), makePredicateExpr, LeafTraversalMode::kDoNotTraverseLeaf); + const auto traversalMode = LeafTraversalMode::kDoNotTraverseLeaf; + generatePredicate(context, *matchExpr->fieldRef(), makePredicate, traversalMode); } /** @@ -855,9 +438,8 @@ void generateArraySize(MatchExpressionVisitorContext* context, void generateComparison(MatchExpressionVisitorContext* context, const ComparisonMatchExpression* expr, sbe::EPrimBinary::Op binaryOp) { - auto makePredicateExpr = - [context, expr, binaryOp](const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { - return generateComparisonExpr(context->state, expr, binaryOp, var) + auto makePredicate = [context, expr, binaryOp](EvalExpr inputExpr) { + return generateComparisonExpr(context->state, expr, binaryOp, std::move(inputExpr)) .extractExpr(context->state.slotVarMap); }; @@ -885,8 +467,7 @@ void generateComparison(MatchExpressionVisitorContext* context, matchesNothing = true; } - generatePredicateExpr( - context, *expr->fieldRef(), makePredicateExpr, traversalMode, true, matchesNothing); + generatePredicate(context, *expr->fieldRef(), makePredicate, traversalMode, matchesNothing); } /** @@ -897,29 +478,13 @@ void generateComparison(MatchExpressionVisitorContext* context, void generateBitTest(MatchExpressionVisitorContext* context, const BitTestMatchExpression* expr, const sbe::BitTestBehavior& bitOp) { - auto makePredicateExpr = - [context, expr, bitOp](const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { - return generateBitTestExpr(context->state, expr, bitOp, var) + auto makePredicate = [context, expr, bitOp](EvalExpr inputExpr) { + return generateBitTestExpr(context->state, expr, bitOp, std::move(inputExpr)) .extractExpr(context->state.slotVarMap); }; - generatePredicateExpr( - context, *expr->fieldRef(), makePredicateExpr, LeafTraversalMode::kArrayElementsOnly); -} - -// Each logical expression child is evaluated in a separate EvalFrame. Set up a new EvalFrame with a -// limit-1/coscan tree. -void pushFrameForLogicalExpressionChild(MatchExpressionVisitorContext* context, - size_t numChildren) { - if (numChildren <= 1) { - // For logical expressions with no children, we return constant (handled in the - // post-visitor). For expressions with 1 child, we evaluate the child within the current - // EvalFrame. - return; - } - - const auto& frame = context->evalStack.topFrame(); - context->evalStack.emplaceFrame(EvalStage{}, frame.data().inputSlot); + const auto traversalMode = LeafTraversalMode::kArrayElementsOnly; + generatePredicate(context, *expr->fieldRef(), makePredicate, traversalMode); } // Build specified logical expression with branches stored on stack. @@ -927,96 +492,26 @@ void buildLogicalExpression(sbe::EPrimBinary::Op op, size_t numChildren, MatchExpressionVisitorContext* context) { if (numChildren == 0) { - // If logical expression does not have any children, constant is returned. + // If an $and or $or expression does not have any children, a constant is returned. generateAlwaysBoolean(context, op == sbe::EPrimBinary::logicAnd); return; } else if (numChildren == 1) { - // For expressions with 1 child, do nothing and return. The post-visitor for the child - // expression has already done all the necessary work. + // For $and or $or expressions with 1 child, do nothing and return. The post-visitor for + // the child expression has already done all the necessary work. return; } - // Move the children's outputs off of the evalStack into a vector in preparation for - // calling generateShortCircuitingLogicalOp(). - std::vector<EvalExprStagePair> branches; - for (size_t i = 0; i < numChildren; ++i) { - auto [expr, stage] = context->evalStack.popFrame(); - branches.emplace_back(std::move(expr), std::move(stage)); - } - std::reverse(branches.begin(), branches.end()); - - auto& frame = context->evalStack.topFrame(); - auto&& [expr, opStage] = generateShortCircuitingLogicalOp(op, - std::move(branches), - context->planNodeId, - context->state.slotIdGenerator, - context->state.slotVarMap, - context->stateHelper); - frame.pushExpr(std::move(expr)); - - // Join frame.stage with opStage. - frame.setStage(makeLoopJoin(frame.extractStage(), std::move(opStage), context->planNodeId)); -} - -/** - * Helper to use for 'makePredicate' argument of 'generatePredicate' function for $elemMatch - * expressions. - */ -EvalExprStagePair elemMatchMakePredicate(MatchExpressionVisitorContext* context, - sbe::value::SlotId filterSlot, - EvalStage& filterStage, - sbe::value::SlotId childInputSlot, - sbe::value::SlotId inputSlot, - EvalStage inputStage) { - // The 'filterStage' subtree was generated to read from 'childInputSlot', based on - // the assumption that 'childInputSlot' is some correlated slot that will be made - // available by childStages's parent. We add a projection here to 'inputStage' to - // feed 'inputSlot' into 'childInputSlot'. - auto isInputArray = context->state.slotId(); - auto fromBranch = makeProject(std::move(inputStage), - context->planNodeId, - childInputSlot, - sbe::makeE<sbe::EVariable>(inputSlot), - isInputArray, - makeFunction("isArray", sbe::makeE<sbe::EVariable>(inputSlot))); - - auto [innerResultSlot, innerBranch] = [&]() -> std::pair<sbe::value::SlotId, EvalStage> { - if (!context->stateHelper.stateContainsValue()) { - return {filterSlot, std::move(filterStage)}; - } + auto& frame = context->topFrame(); - auto resultSlot = context->state.slotId(); - return {resultSlot, - makeProject(std::move(filterStage), - context->planNodeId, - resultSlot, - context->stateHelper.makeInitialState( - context->stateHelper.getBool(filterSlot)))}; - }(); + // Move the children's outputs off of the matchStack into a vector in preparation for + // calling makeBalancedBooleanOpTree(). + std::vector<EvalExpr> exprs; + for (size_t i = 0; i < numChildren; ++i) { + exprs.emplace_back(frame.popEvalExpr()); + } + std::reverse(exprs.begin(), exprs.end()); - innerBranch = makeFilter<true>( - std::move(innerBranch), sbe::makeE<sbe::EVariable>(isInputArray), context->planNodeId); - - // Generate the traverse. - auto traverseSlot = context->state.slotId(); - auto traverseStage = context->stateHelper.makeTraverseCombinator( - std::move(fromBranch), - std::move(innerBranch), // NOLINT(bugprone-use-after-move) - childInputSlot, - traverseSlot, - innerResultSlot, - context->planNodeId, - context->state.frameIdGenerator); - - // There are some cases where 'traverseOutputSlot' gets set to Nothing when TraverseStage - // doesn't match anything. One example of when this happens is when innerBranch->getNext() - // returns EOF every time it is called by TraverseStage. In these cases $elemMatch should return - // false instead of Nothing. - auto projectExpr = makeBinaryOp(sbe::EPrimBinary::fillEmpty, - sbe::makeE<sbe::EVariable>(traverseSlot), - context->stateHelper.makeState(false)); - - return {std::move(projectExpr), std::move(traverseStage)}; + frame.pushExpr(makeBalancedBooleanOpTree(op, std::move(exprs), context->state.slotVarMap)); } /** @@ -1029,55 +524,33 @@ public: void visit(const AlwaysFalseMatchExpression* expr) final {} void visit(const AlwaysTrueMatchExpression* expr) final {} - - void visit(const AndMatchExpression* expr) final { - if (expr == _context->topLevelAnd) { - // Usually, we implement AND expression using limit-1/union tree. Each branch of a union - // stage represents AND's argument. For top-level AND we apply an optimization that - // allows us to get rid of limit-1/union tree. - // Firstly, we add filter stage on top of tree for each of AND's arguments. This ensures - // that respective tree does not return ADVANCED if argument evaluates to false. - // Secondly, we place trees of AND's arguments on top of each other. This guarantees - // that the whole resulting tree for AND does not return ADVANCED if one of arguments - // did not returned ADVANCED (e.g. evaluated to false). - // First step is performed in 'MatchExpressionInVisitor' and - // 'MatchExpressionPostVisitor'. Second step is achieved by evaluating each child within - // one EvalFrame, so that each child builds directly on top of - // '_context->evalStack.topFrame().extractStage()'. - return; - } - - // For non-top-level $and's, we evaluate each child in its own EvalFrame. - pushFrameForLogicalExpressionChild(_context, expr->numChildren()); - } - + void visit(const AndMatchExpression* expr) final {} void visit(const BitsAllClearMatchExpression* expr) final {} void visit(const BitsAllSetMatchExpression* expr) final {} void visit(const BitsAnyClearMatchExpression* expr) final {} void visit(const BitsAnySetMatchExpression* expr) final {} void visit(const ElemMatchObjectMatchExpression* matchExpr) final { - // ElemMatchObjectMatchExpression is guaranteed to always have exactly 1 child - invariant(matchExpr->numChildren() == 1); + auto numChildren = matchExpr->numChildren(); + tassert(6987603, "Expected ElemMatchObject to have exactly 1 child", numChildren == 1); - // We evaluate $elemMatch's child in a new EvalFrame. For the child's EvalFrame, we set the - // 'stage' field to be a null tree, and we set the 'inputSlot' field to be a newly allocated - // slot (childInputSlot). childInputSlot is a "correlated slot" that will be set up later - // (handled in the post-visitor). - auto childInputSlot = _context->state.slotId(); - _context->evalStack.emplaceFrame(EvalStage{}, childInputSlot); + // We evaluate $elemMatch's child in a new MatchFrame. For the child's MatchFrame, we set + // the 'inputExpr' field to be the lambda's parameter (lambdaParam). + auto lambdaFrameId = _context->state.frameId(); + auto lambdaParam = makeVariable(lambdaFrameId, 0); + _context->emplaceFrame(std::move(lambdaParam), lambdaFrameId); } void visit(const ElemMatchValueMatchExpression* matchExpr) final { - invariant(matchExpr->numChildren() >= 1); + auto numChildren = matchExpr->numChildren(); + tassert(6987604, "Expected ElemMatchValue to have at least 1 child", numChildren >= 1); - // We evaluate each child in its own EvalFrame. Set up a new EvalFrame with a null tree - // for the first child. For all of the children's EvalFrames, we set the 'inputSlot' field - // to 'childInputSlot'. childInputSlot is a "correlated slot" that will be set up later in - // the post-visitor (childInputSlot will be the correlated parameter of a TraverseStage). - auto childInputSlot = _context->state.slotId(); + // We create a new MatchFrame for evaluating $elemMatch's children. For this new MatchFrame, + // we set the 'inputExpr' field to be the lambda's parameter (lambdaParam). + auto lambdaFrameId = _context->state.frameId(); + auto lambdaParam = makeVariable(lambdaFrameId, 0); bool childOfElemMatchValue = true; - _context->evalStack.emplaceFrame(EvalStage{}, childInputSlot, childOfElemMatchValue); + _context->emplaceFrame(std::move(lambdaParam), lambdaFrameId, childOfElemMatchValue); } void visit(const EqualityMatchExpression* expr) final {} @@ -1161,18 +634,13 @@ public: void visit(const LTEMatchExpression* expr) final {} void visit(const LTMatchExpression* expr) final {} void visit(const ModMatchExpression* expr) final {} - void visit(const NorMatchExpression* expr) final { - pushFrameForLogicalExpressionChild(_context, expr->numChildren()); - } + void visit(const NorMatchExpression* expr) final {} void visit(const NotMatchExpression* expr) final { invariant(expr->numChildren() == 1); } - void visit(const OrMatchExpression* expr) final { - pushFrameForLogicalExpressionChild(_context, expr->numChildren()); - } - + void visit(const OrMatchExpression* expr) final {} void visit(const RegexMatchExpression* expr) final {} void visit(const SizeMatchExpression* expr) final {} @@ -1249,26 +717,6 @@ public: } void visit(const AndMatchExpression* expr) final { - if (expr == _context->topLevelAnd) { - // For a top-level $and with no children, do nothing and return. For top-level $and's - // with at least one, we evaluate each child within the current EvalFrame. - if (expr->numChildren() >= 1) { - // Process the output of the last child. - if (_context->stateHelper.stateContainsValue()) { - projectCurrentExprToOutputSlot(_context); - } - - auto& frame = _context->evalStack.topFrame(); - invariant(frame.exprsCount() > 0); - frame.setStage( - makeFilter<false>(frame.extractStage(), - _context->stateHelper.getBool( - frame.popExpr().extractExpr(_context->state.slotVarMap)), - _context->planNodeId)); - } - return; - } - buildLogicalExpression(sbe::EPrimBinary::logicAnd, expr->numChildren(), _context); } @@ -1290,110 +738,82 @@ public: void visit(const ElemMatchObjectMatchExpression* matchExpr) final { using namespace std::placeholders; - // ElemMatchObjectMatchExpression is guaranteed to always have exactly 1 child - invariant(matchExpr->numChildren() == 1); - - // Extract the input slot, the output, and the stage from of the child's EvalFrame, and - // remove the child's EvalFrame from the stack. - tassert(5273405, - "Eval frame's input slot is not defined", - _context->evalStack.topFrame().data().inputSlot); - auto childInputSlot = *_context->evalStack.topFrame().data().inputSlot; - auto filterPair = [&]() { - auto [expr, stage] = _context->evalStack.popFrame(); - auto [predicateSlot, predicateStage] = projectEvalExpr(std::move(expr), - std::move(stage), - _context->planNodeId, - _context->state.slotIdGenerator, - _context->state.slotVarMap); - - auto isObjectOrArrayExpr = - makeBinaryOp(sbe::EPrimBinary::logicOr, - makeFunction("isObject", sbe::makeE<sbe::EVariable>(childInputSlot)), - makeFunction("isArray", sbe::makeE<sbe::EVariable>(childInputSlot))); - predicateStage = makeFilter<true>( - std::move(predicateStage), std::move(isObjectOrArrayExpr), _context->planNodeId); - return std::make_pair(predicateSlot, std::move(predicateStage)); - }(); - - // We're using 'kDoNotTraverseLeaf' traverse mode, so we're guaranteed that 'makePredicate' - // will only be called once, so it's safe to bind the reference to 'filterStage' subtree - // here. - auto makePredicate = - [this, filterSlot = filterPair.first, &filterStage = filterPair.second, childInputSlot]( - sbe::value::SlotId&& inputSlot, EvalStage&& inputStage) { - return elemMatchMakePredicate(_context, - filterSlot, - filterStage, - childInputSlot, - std::forward<sbe::value::SlotId>(inputSlot), - std::forward<EvalStage>(inputStage)); - }; + auto numChildren = matchExpr->numChildren(); + tassert(6987605, "Expected ElemMatchObject to have exactly 1 child", numChildren == 1); + tassert( + 6987606, "Expected frameId to be defined", _context->topFrame().frameId.has_value()); - // 'makePredicate' defined above returns a state instead of plain boolean value, so there is - // no need to use combinator for it. - generatePredicate(_context, - *matchExpr->fieldRef(), - makePredicate, - LeafTraversalMode::kDoNotTraverseLeaf, - false /* useCombinator */); + auto lambdaFrameId = *_context->topFrame().frameId; + auto lambdaParam = makeVariable(lambdaFrameId, 0); + + auto lambdaBodyExpr = makeBinaryOp( + sbe::EPrimBinary::logicAnd, + makeFunction( + "typeMatch", + std::move(lambdaParam), + makeConstant(sbe::value::TypeTags::NumberInt64, + sbe::value::bitcastFrom<int64_t>(getBSONTypeMask(BSONType::Array) | + getBSONTypeMask(BSONType::Object)))), + _context->topFrame().popExpr(_context->state.slotVarMap)); + + _context->popFrame(); + + auto lambdaExpr = sbe::makeE<sbe::ELocalLambda>(lambdaFrameId, std::move(lambdaBodyExpr)); + + auto makePredicate = [&](EvalExpr inputExpr) { + return makeBinaryOp(sbe::EPrimBinary::logicAnd, + makeFillEmptyFalse(makeFunction( + "isArray", inputExpr.getExpr(_context->state.slotVarMap))), + makeFillEmptyFalse(makeFunction( + "traverseF", + inputExpr.getExpr(_context->state.slotVarMap), + std::move(lambdaExpr), + makeConstant(sbe::value::TypeTags::Boolean, false)))); + }; + + const auto traversalMode = LeafTraversalMode::kDoNotTraverseLeaf; + generatePredicate(_context, *matchExpr->fieldRef(), makePredicate, traversalMode); } void visit(const ElemMatchValueMatchExpression* matchExpr) final { using namespace std::placeholders; auto numChildren = matchExpr->numChildren(); - invariant(numChildren >= 1); + tassert(6987607, "Expected ElemMatchValue to have at least 1 child", numChildren >= 1); + tassert( + 6987608, "Expected frameId to be defined", _context->topFrame().frameId.has_value()); - tassert(5273406, - "Eval frame's input slot is not defined", - _context->evalStack.topFrame().data().inputSlot); - auto childInputSlot = *_context->evalStack.topFrame().data().inputSlot; + auto lambdaFrameId = *_context->topFrame().frameId; + auto lambdaParam = makeVariable(lambdaFrameId, 0); - // Move the children's outputs off of the evalStack into a vector in preparation for - // calling generateShortCircuitingLogicalOp(). - std::vector<EvalExprStagePair> childStages; + // Move the children's outputs off of the expr stack into a vector in preparation for + // calling makeBalancedBooleanOpTree(). + std::vector<EvalExpr> exprs; for (size_t i = 0; i < numChildren; ++i) { - auto [expr, stage] = _context->evalStack.popFrame(); - childStages.emplace_back(std::move(expr), std::move(stage)); + exprs.emplace_back(_context->topFrame().popEvalExpr()); } - std::reverse(childStages.begin(), childStages.end()); - - auto [filterExpr, filterStage] = - generateShortCircuitingLogicalOp(sbe::EPrimBinary::logicAnd, - std::move(childStages), - _context->planNodeId, - _context->state.slotIdGenerator, - _context->state.slotVarMap, - _context->stateHelper); - - auto filterPair = projectEvalExpr(std::move(filterExpr), - std::move(filterStage), - _context->planNodeId, - _context->state.slotIdGenerator, - _context->state.slotVarMap); - - // We're using 'kDoNotTraverseLeaf' traverse mode, so we're guaranteed that 'makePredcate' - // will only be called once, so it's safe to bind the reference to 'filterStage' subtree - // here. - auto makePredicate = - [this, filterSlot = filterPair.first, &filterStage = filterPair.second, childInputSlot]( - sbe::value::SlotId&& inputSlot, EvalStage&& inputStage) { - return elemMatchMakePredicate(_context, - filterSlot, - filterStage, - childInputSlot, - std::forward<sbe::value::SlotId>(inputSlot), - std::forward<EvalStage>(inputStage)); - }; + std::reverse(exprs.begin(), exprs.end()); + + _context->popFrame(); + auto lambdaBodyExpr = makeBalancedBooleanOpTree( + sbe::EPrimBinary::logicAnd, std::move(exprs), _context->state.slotVarMap); + + auto lambdaExpr = sbe::makeE<sbe::ELocalLambda>( + lambdaFrameId, lambdaBodyExpr.extractExpr(_context->state.slotVarMap)); + + auto makePredicate = [&](EvalExpr inputExpr) { + return makeBinaryOp(sbe::EPrimBinary::logicAnd, + makeFillEmptyFalse(makeFunction( + "isArray", inputExpr.getExpr(_context->state.slotVarMap))), + makeFillEmptyFalse(makeFunction( + "traverseF", + inputExpr.getExpr(_context->state.slotVarMap), + std::move(lambdaExpr), + makeConstant(sbe::value::TypeTags::Boolean, false)))); + }; - // 'makePredicate' defined above returns a state instead of plain boolean value, so there is - // no need to use combinator for it. - generatePredicate(_context, - *matchExpr->fieldRef(), - makePredicate, - LeafTraversalMode::kDoNotTraverseLeaf, - false /* useCombinator */); + const auto traversalMode = LeafTraversalMode::kDoNotTraverseLeaf; + generatePredicate(_context, *matchExpr->fieldRef(), makePredicate, traversalMode); } void visit(const EqualityMatchExpression* expr) final { @@ -1401,49 +821,29 @@ public: } void visit(const ExistsMatchExpression* expr) final { - const auto traversalMode = LeafTraversalMode::kDoNotTraverseLeaf; - - auto makePredicateExpr = - [expr, - context = _context](const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { - auto resultExpr = sbe::makeE<sbe::EFunction>("exists", sbe::makeEs(var.clone())); - - // $exists is always applied to the leaf of the field path. For kDoNotTraverseLeaf mode, - // generatePredicateExpr() does not convert the predicate value to state when generating - // traversal for leaf nodes of field path. For this reason, we need to perform this - // conversion manually. - if (!expr->fieldRef()->empty() && context->evalStack.topFrame().data().inputSlot) { - resultExpr = context->stateHelper.makeState(std::move(resultExpr)); - } - - return resultExpr; + auto makePredicate = [this](EvalExpr inputExpr) { + return sbe::makeE<sbe::EFunction>( + "exists", sbe::makeEs(inputExpr.extractExpr(_context->state.slotVarMap))); }; - generatePredicateExpr(_context, *expr->fieldRef(), makePredicateExpr, traversalMode); + const auto traversalMode = LeafTraversalMode::kDoNotTraverseLeaf; + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode); } void visit(const ExprMatchExpression* matchExpr) final { - auto& frame = _context->evalStack.topFrame(); - - // The $expr expression must by applied to the current $$ROOT document, so make sure that - // an input slot associated with the current frame is the same slot as the input slot for - // the entire match expression we're translating - tassert(5273407, "Match expression's input slot is not defined", _context->inputSlot); - tassert(5273408, "Eval frame's input slot is not defined", frame.data().inputSlot); - tassert(5273409, - "Eval frame for $expr is not computed over expression's input slot", - *frame.data().inputSlot == *_context->inputSlot); + auto& frame = _context->topFrame(); + // The $expr expression is always applied to the current $$ROOT document. auto expr = generateExpression(_context->state, matchExpr->getExpression().get(), - *frame.data().inputSlot, + _context->inputExpr.clone(), _context->slots); // We need to convert the result of the '{$expr: ..}' expression to a boolean value. auto logicExpr = makeFillEmptyFalse( makeFunction("coerceToBool", expr.extractExpr(_context->state.slotVarMap))); - frame.pushExpr(_context->stateHelper.makeState(std::move(logicExpr))); + frame.pushExpr(std::move(logicExpr)); } void visit(const GTEMatchExpression* expr) final { @@ -1469,21 +869,21 @@ public: : LeafTraversalMode::kArrayElementsOnly; if (exprIsParameterized || expr->getRegexes().size() == 0) { - auto makePredicateExpr = [&, hasNull = hasNull](const sbe::EVariable& var) { + auto makePredicate = [&, hasNull = hasNull](EvalExpr inputExpr) { // We have to match nulls and undefined if a 'null' is present in // equalities. - auto inputExpr = !hasNull - ? var.clone() - : sbe::makeE<sbe::EIf>(generateNullOrMissing(var), - makeConstant(sbe::value::TypeTags::Null, 0), - var.clone()); + auto valueExpr = !hasNull + ? inputExpr.extractExpr(_context->state.slotVarMap) + : sbe::makeE<sbe::EIf>( + generateNullOrMissing(inputExpr.clone(), _context->state.slotVarMap), + makeConstant(sbe::value::TypeTags::Null, 0), + inputExpr.getExpr(_context->state.slotVarMap)); return makeIsMember( - std::move(inputExpr), std::move(equalitiesExpr), _context->state.data->env); + std::move(valueExpr), std::move(equalitiesExpr), _context->state.data->env); }; - generatePredicateExpr( - _context, *expr->fieldRef(), makePredicateExpr, traversalMode, true, hasNull); + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode, hasNull); return; } @@ -1522,34 +922,37 @@ public: auto regexSetConstant = sbe::makeE<sbe::EConstant>(regexSetTag, regexSetVal); regexArrSetGuard.reset(); - auto makePredicateExpr = [&, hasNull = hasNull](const sbe::EVariable& var) { + auto makePredicate = [&, hasNull = hasNull](EvalExpr inputExpr) { auto resultExpr = makeBinaryOp( sbe::EPrimBinary::logicOr, - makeFillEmptyFalse( - makeFunction("isMember", var.clone(), std::move(regexSetConstant))), - makeFillEmptyFalse( - makeFunction("regexMatch", std::move(pcreRegexesConstant), var.clone()))); + makeFillEmptyFalse(makeFunction("isMember", + inputExpr.getExpr(_context->state.slotVarMap), + std::move(regexSetConstant))), + makeFillEmptyFalse(makeFunction("regexMatch", + std::move(pcreRegexesConstant), + inputExpr.getExpr(_context->state.slotVarMap)))); if (expr->getEqualities().size() > 0) { // We have to match nulls and undefined if a 'null' is present in equalities. - auto inputExpr = !hasNull - ? var.clone() - : sbe::makeE<sbe::EIf>(generateNullOrMissing(var), - makeConstant(sbe::value::TypeTags::Null, 0), - var.clone()); - - resultExpr = makeBinaryOp(sbe::EPrimBinary::logicOr, - makeIsMember(std::move(inputExpr), - std::move(equalitiesExpr), - _context->state.data->env), - std::move(resultExpr)); + if (hasNull) { + inputExpr = sbe::makeE<sbe::EIf>( + generateNullOrMissing(inputExpr.clone(), _context->state.slotVarMap), + makeConstant(sbe::value::TypeTags::Null, 0), + inputExpr.getExpr(_context->state.slotVarMap)); + } + + resultExpr = + makeBinaryOp(sbe::EPrimBinary::logicOr, + makeIsMember(inputExpr.extractExpr(_context->state.slotVarMap), + std::move(equalitiesExpr), + _context->state.data->env), + std::move(resultExpr)); } return resultExpr; }; - generatePredicateExpr( - _context, *expr->fieldRef(), makePredicateExpr, traversalMode, true, hasNull); + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode, hasNull); } // The following are no-ops. The internal expr comparison match expression are produced // internally by rewriting an $expr expression to an AND($expr, $_internalExpr[OP]), which can @@ -1606,15 +1009,13 @@ public: // The mod function returns the result of the mod operation between the operand and // given divisor, so construct an expression to then compare the result of the operation // to the given remainder. - auto makePredicateExpr = - [context = _context, - expr](const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { - return generateModExpr(context->state, expr, var) + auto makePredicate = [context = _context, expr](EvalExpr inputExpr) { + return generateModExpr(context->state, expr, std::move(inputExpr)) .extractExpr(context->state.slotVarMap); }; - generatePredicateExpr( - _context, *expr->fieldRef(), makePredicateExpr, LeafTraversalMode::kArrayElementsOnly); + const auto traversalMode = LeafTraversalMode::kArrayElementsOnly; + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode); } void visit(const NorMatchExpression* expr) final { @@ -1626,20 +1027,18 @@ public: // Here we discard the index value of the state even if it was set by expressions below NOR. // This matches the behaviour of classic engine, which does not pass 'MatchDetails' object // to children of NOR and thus does not get any information on 'elemMatchKey' from them. - auto& frame = _context->evalStack.topFrame(); - frame.pushExpr(_context->stateHelper.makeState(makeNot(_context->stateHelper.getBool( - frame.popExpr().extractExpr(_context->state.slotVarMap))))); + auto& frame = _context->topFrame(); + frame.pushExpr(makeNot(frame.popExpr(_context->state.slotVarMap))); } void visit(const NotMatchExpression* expr) final { - auto& frame = _context->evalStack.topFrame(); + auto& frame = _context->topFrame(); // Negate the result of $not's child. // Here we discard the index value of the state even if it was set by expressions below NOT. // This matches the behaviour of classic engine, which does not pass 'MatchDetails' object // to children of NOT and thus does not get any information on 'elemMatchKey' from them. - frame.pushExpr(_context->stateHelper.makeState(makeNot(_context->stateHelper.getBool( - frame.popExpr().extractExpr(_context->state.slotVarMap))))); + frame.pushExpr(makeNot(frame.popExpr(_context->state.slotVarMap))); } void visit(const OrMatchExpression* expr) final { @@ -1647,15 +1046,13 @@ public: } void visit(const RegexMatchExpression* expr) final { - auto makePredicateExpr = - [context = _context, - expr](const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { - return generateRegexExpr(context->state, expr, var) + auto makePredicate = [context = _context, expr](EvalExpr inputExpr) { + return generateRegexExpr(context->state, expr, std::move(inputExpr)) .extractExpr(context->state.slotVarMap); }; - generatePredicateExpr( - _context, *expr->fieldRef(), makePredicateExpr, LeafTraversalMode::kArrayElementsOnly); + const auto traversalMode = LeafTraversalMode::kArrayElementsOnly; + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode); } void visit(const SizeMatchExpression* expr) final { @@ -1672,17 +1069,15 @@ public: // if the type set contains 'BSONType::Array'. if (auto typeMaskParam = expr->getInputParamId()) { auto typeMaskSlotId = _context->state.registerInputParamSlot(*typeMaskParam); - auto makePredicateExpr = - [typeMaskSlotId](const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { + auto makePredicate = [this, typeMaskSlotId](EvalExpr inputExpr) { return makeFillEmptyFalse( - makeFunction("typeMatch", var.clone(), makeVariable(typeMaskSlotId))); + makeFunction("typeMatch", + inputExpr.extractExpr(_context->state.slotVarMap), + makeVariable(typeMaskSlotId))); }; - generatePredicateExpr(_context, - *expr->fieldRef(), - makePredicateExpr, - LeafTraversalMode::kArrayElementsOnly); - + const auto traversalMode = LeafTraversalMode::kArrayElementsOnly; + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode); return; } @@ -1690,37 +1085,22 @@ public: ? LeafTraversalMode::kDoNotTraverseLeaf : LeafTraversalMode::kArrayElementsOnly; - auto makePredicateExpr = - [expr, traversalMode, context = _context]( - const sbe::EVariable& var) -> std::unique_ptr<sbe::EExpression> { + auto makePredicate = [expr, traversalMode, context = _context](EvalExpr inputExpr) { const MatcherTypeSet& ts = expr->typeSet(); - auto resultExpr = makeFillEmptyFalse( + return makeFillEmptyFalse( makeFunction("typeMatch", - var.clone(), + inputExpr.extractExpr(context->state.slotVarMap), makeConstant(sbe::value::TypeTags::NumberInt64, sbe::value::bitcastFrom<int64_t>(ts.getBSONTypeMask())))); - - // $type is always applied to the leaf of the field path. For kDoNotTraverseLeaf mode, - // generatePredicateExpr() does not convert the predicate value to state when generating - // traversal for leaf nodes of field path. For this reason, we need to perform this - // conversion manually. - if (!expr->fieldRef()->empty() && context->evalStack.topFrame().data().inputSlot && - traversalMode == LeafTraversalMode::kDoNotTraverseLeaf) { - resultExpr = context->stateHelper.makeState(std::move(resultExpr)); - } - - return resultExpr; }; - generatePredicateExpr(_context, *expr->fieldRef(), makePredicateExpr, traversalMode); + generatePredicate(_context, *expr->fieldRef(), makePredicate, traversalMode); } void visit(const WhereMatchExpression* expr) final { - auto& frame = _context->evalStack.topFrame(); - auto resultExpr = - generateWhereExpr(_context->state, expr, sbe::EVariable{*frame.data().inputSlot}); - frame.pushExpr( - _context->stateHelper.makeState(resultExpr.extractExpr(_context->state.slotVarMap))); + auto& frame = _context->topFrame(); + auto resultExpr = generateWhereExpr(_context->state, expr, frame.inputExpr.clone()); + frame.pushExpr(resultExpr.extractExpr(_context->state.slotVarMap)); } void visit(const WhereNoOpMatchExpression* expr) final {} @@ -1739,47 +1119,13 @@ public: void visit(const AlwaysFalseMatchExpression* expr) final {} void visit(const AlwaysTrueMatchExpression* expr) final {} - - void visit(const AndMatchExpression* expr) final { - if (expr == _context->topLevelAnd) { - // For a top-level $and, we evaluate each child within the current EvalFrame. - auto& frame = _context->evalStack.topFrame(); - invariant(frame.exprsCount() > 0); - frame.setStage( - makeFilter<false>(frame.extractStage(), - _context->stateHelper.getBool( - frame.popExpr().extractExpr(_context->state.slotVarMap)), - _context->planNodeId)); - return; - } - - // For non-top-level $and's, we evaluate each child in its own EvalFrame, and we - // leave these EvalFrames on the stack until we're done evaluating all the children. - pushFrameForLogicalExpressionChild(_context, expr->numChildren()); - } - + void visit(const AndMatchExpression* expr) final {} void visit(const BitsAllClearMatchExpression* expr) final {} void visit(const BitsAllSetMatchExpression* expr) final {} void visit(const BitsAnyClearMatchExpression* expr) final {} void visit(const BitsAnySetMatchExpression* expr) final {} - - void visit(const ElemMatchObjectMatchExpression* matchExpr) final { - // ElemMatchObjectMatchExpression is guaranteed to always have exactly 1 child, so we don't - // need to do anything here. - } - - void visit(const ElemMatchValueMatchExpression* matchExpr) final { - const auto& frame = _context->evalStack.topFrame(); - - // We leave each child's EvalFrame on the stack until we're finished evaluating all of - // the children. Set up a new EvalFrame for the next child with a null tree and with the - // 'inputSlot' field set to 'childInputSlot'. childInputSlot is a "correlated slot" that - // will be set up later (handled in the post-visitor). - auto inputSlot = frame.data().inputSlot; - bool childOfElemMatchValue = true; - _context->evalStack.emplaceFrame(EvalStage{}, inputSlot, childOfElemMatchValue); - } - + void visit(const ElemMatchObjectMatchExpression* matchExpr) final {} + void visit(const ElemMatchValueMatchExpression* matchExpr) final {} void visit(const EqualityMatchExpression* expr) final {} void visit(const ExistsMatchExpression* expr) final {} void visit(const ExprMatchExpression* expr) final {} @@ -1817,21 +1163,9 @@ public: void visit(const LTEMatchExpression* expr) final {} void visit(const LTMatchExpression* expr) final {} void visit(const ModMatchExpression* expr) final {} - - void visit(const NorMatchExpression* expr) final { - // We leave the EvalFrame of each child on the stack until we're done evaluating all the - // children. - pushFrameForLogicalExpressionChild(_context, expr->numChildren()); - } - + void visit(const NorMatchExpression* expr) final {} void visit(const NotMatchExpression* expr) final {} - - void visit(const OrMatchExpression* expr) final { - // We leave the EvalFrame of each child on the stack until we're done evaluating all the - // children. - pushFrameForLogicalExpressionChild(_context, expr->numChildren()); - } - + void visit(const OrMatchExpression* expr) final {} void visit(const RegexMatchExpression* expr) final {} void visit(const SizeMatchExpression* expr) final {} void visit(const TextMatchExpression* expr) final {} @@ -1845,24 +1179,19 @@ private: MatchExpressionVisitorContext* _context; }; -EvalStage applyClassicMatcher(const MatchExpression* root, - EvalStage stage, - sbe::value::SlotId inputSlot, - PlanNodeId planNodeId) { - auto expr = makeFunction("applyClassicMatcher", - makeConstant(sbe::value::TypeTags::classicMatchExpresion, - sbe::value::bitcastFrom<const MatchExpression*>( - root->shallowClone().release())), - makeVariable(inputSlot)); - - return makeFilter<false>(std::move(stage), std::move(expr), planNodeId); +EvalExpr applyClassicMatcher(const MatchExpression* root, + EvalExpr inputExpr, + optimizer::SlotVarMap& slotVarMap) { + return makeFunction("applyClassicMatcher", + makeConstant(sbe::value::TypeTags::classicMatchExpresion, + sbe::value::bitcastFrom<const MatchExpression*>( + root->shallowClone().release())), + inputExpr.extractExpr(slotVarMap)); } -EvalStage applyClassicMatcherOverIndexScan(const MatchExpression* root, - EvalStage stage, - const PlanStageSlots* slots, - const std::vector<std::string>& keyFields, - PlanNodeId planNodeId) { +EvalExpr applyClassicMatcherOverIndexScan(const MatchExpression* root, + const PlanStageSlots* slots, + const std::vector<std::string>& keyFields) { BSONObjBuilder keyPatternBuilder; auto keySlots = sbe::makeSV(); for (const auto& field : keyFields) { @@ -1874,35 +1203,24 @@ EvalStage applyClassicMatcherOverIndexScan(const MatchExpression* root, auto keyPatternTree = buildKeyPatternTree(keyPatternBuilder.obj(), keySlots); auto mkObjExpr = buildNewObjExpr(keyPatternTree.get()); - auto expr = makeFunction("applyClassicMatcher", - makeConstant(sbe::value::TypeTags::classicMatchExpresion, - sbe::value::bitcastFrom<const MatchExpression*>( - root->shallowClone().release())), - std::move(mkObjExpr)); - - return makeFilter<false>(std::move(stage), std::move(expr), planNodeId); + return makeFunction("applyClassicMatcher", + makeConstant(sbe::value::TypeTags::classicMatchExpresion, + sbe::value::bitcastFrom<const MatchExpression*>( + root->shallowClone().release())), + std::move(mkObjExpr)); } } // namespace -std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter( - StageBuilderState& state, - const MatchExpression* root, - EvalStage stage, - boost::optional<sbe::value::SlotId> inputSlot, - const PlanStageSlots* slots, - PlanNodeId nodeId, - const std::vector<std::string>& keyFields, - bool isFilterOverIxscan, - bool trackIndex) { - // We don't support tracking the index when 'isFilterOverIxscan' is true. - tassert(7097206, - "The 'trackIndex' option is not support for filters over index scans", - !isFilterOverIxscan || !trackIndex); - +EvalExpr generateFilter(StageBuilderState& state, + const MatchExpression* root, + EvalExpr inputExpr, + const PlanStageSlots* slots, + const std::vector<std::string>& keyFields, + bool isFilterOverIxscan) { // The planner adds an $and expression without the operands if the query was empty. We can bail // out early without generating the filter plan stage if this is the case. if (root->matchType() == MatchExpression::AND && root->numChildren() == 0) { - return {boost::none, std::move(stage)}; + return EvalExpr{}; } // We only use the classic matcher path (aka "franken matcher") when SBE is not fully enabled. @@ -1911,20 +1229,17 @@ std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter( // This is because when embedding the classic matcher all of the constants used in the filter // are in the MatchExpression itself rather than in slots. if (!feature_flags::gFeatureFlagSbeFull.isEnabledAndIgnoreFCV()) { - tassert(6681403, "trackIndex=true not supported for classic matcher in SBE", !trackIndex); tassert(7097207, - "Expected input slot or key slots to be defined", - inputSlot.has_value() || isFilterOverIxscan); + "Expected input expr to be defined", + !inputExpr.isNull() || isFilterOverIxscan); - auto outputStage = isFilterOverIxscan - ? applyClassicMatcherOverIndexScan(root, std::move(stage), slots, keyFields, nodeId) - : applyClassicMatcher(root, std::move(stage), *inputSlot, nodeId); - return {boost::none, std::move(outputStage)}; + return isFilterOverIxscan + ? applyClassicMatcherOverIndexScan(root, slots, keyFields) + : applyClassicMatcher(root, std::move(inputExpr), state.slotVarMap); } - auto stateHelper = makeFilterStateHelper(trackIndex); MatchExpressionVisitorContext context{ - state, std::move(stage), inputSlot, root, nodeId, slots, isFilterOverIxscan, *stateHelper}; + state, std::move(inputExpr), root, slots, isFilterOverIxscan}; MatchExpressionPreVisitor preVisitor{&context}; MatchExpressionInVisitor inVisitor{&context}; @@ -1999,7 +1314,7 @@ std::pair<sbe::value::TypeTags, sbe::value::Value> convertBitTestBitPositions( EvalExpr generateComparisonExpr(StageBuilderState& state, const ComparisonMatchExpression* expr, sbe::EPrimBinary::Op binaryOp, - const sbe::EVariable& var) { + EvalExpr inputExpr) { const auto& rhs = expr->getData(); auto [tagView, valView] = sbe::bson::convertFrom<true>( rhs.rawdata(), rhs.rawdata() + rhs.size(), rhs.fieldNameSize() - 1); @@ -2014,13 +1329,15 @@ EvalExpr generateComparisonExpr(StageBuilderState& state, case sbe::EPrimBinary::neq: break; case sbe::EPrimBinary::greater: - return makeFillEmptyFalse(makeNot(makeFunction("isMinKey", var.clone()))); + return makeFillEmptyFalse( + makeNot(makeFunction("isMinKey", inputExpr.extractExpr(state.slotVarMap)))); case sbe::EPrimBinary::greaterEq: - return makeFunction("exists", var.clone()); + return makeFunction("exists", inputExpr.extractExpr(state.slotVarMap)); case sbe::EPrimBinary::less: return makeConstant(sbe::value::TypeTags::Boolean, false); case sbe::EPrimBinary::lessEq: - return makeFillEmptyFalse(makeFunction("isMinKey", var.clone())); + return makeFillEmptyFalse( + makeFunction("isMinKey", inputExpr.extractExpr(state.slotVarMap))); default: break; } @@ -2032,22 +1349,25 @@ EvalExpr generateComparisonExpr(StageBuilderState& state, case sbe::EPrimBinary::greater: return makeConstant(sbe::value::TypeTags::Boolean, false); case sbe::EPrimBinary::greaterEq: - return makeFillEmptyFalse(makeFunction("isMaxKey", var.clone())); + return makeFillEmptyFalse( + makeFunction("isMaxKey", inputExpr.extractExpr(state.slotVarMap))); case sbe::EPrimBinary::less: - return makeFillEmptyFalse(makeNot(makeFunction("isMaxKey", var.clone()))); + return makeFillEmptyFalse( + makeNot(makeFunction("isMaxKey", inputExpr.extractExpr(state.slotVarMap)))); case sbe::EPrimBinary::lessEq: - return makeFunction("exists", var.clone()); + return makeFunction("exists", inputExpr.extractExpr(state.slotVarMap)); default: break; } } else if (tagView == sbe::value::TypeTags::Null) { // When comparing to null we have to consider missing and undefined. - auto inputExpr = buildMultiBranchConditional( - CaseValuePair{generateNullOrMissing(var), makeConstant(sbe::value::TypeTags::Null, 0)}, - var.clone()); + inputExpr = buildMultiBranchConditional( + CaseValuePair{generateNullOrMissing(inputExpr.clone(), state.slotVarMap), + makeConstant(sbe::value::TypeTags::Null, 0)}, + inputExpr.getExpr(state.slotVarMap)); return makeFillEmptyFalse(makeBinaryOp(binaryOp, - std::move(inputExpr), + inputExpr.extractExpr(state.slotVarMap), makeConstant(sbe::value::TypeTags::Null, 0), state.data->env)); } else if (sbe::value::isNaN(tagView, valView)) { @@ -2057,7 +1377,8 @@ EvalExpr generateComparisonExpr(StageBuilderState& state, case sbe::EPrimBinary::greaterEq: case sbe::EPrimBinary::lessEq: // If 'rhs' is NaN, then return whether the lhs is NaN. - return makeFillEmptyFalse(makeFunction("isNaN", var.clone())); + return makeFillEmptyFalse( + makeFunction("isNaN", inputExpr.extractExpr(state.slotVarMap))); case sbe::EPrimBinary::less: case sbe::EPrimBinary::greater: // Always return false for non-equality operators. @@ -2079,26 +1400,27 @@ EvalExpr generateComparisonExpr(StageBuilderState& state, return makeConstant(tag, val); }(tagView, valView); - return makeFillEmptyFalse( - makeBinaryOp(binaryOp, var.clone(), std::move(valExpr), state.data->env)); + return makeFillEmptyFalse(makeBinaryOp( + binaryOp, inputExpr.extractExpr(state.slotVarMap), std::move(valExpr), state.data->env)); } EvalExpr generateInExpr(StageBuilderState& state, const InMatchExpression* expr, - const sbe::EVariable& var) { + EvalExpr inputExpr) { tassert(6988283, "'generateInExpr' supports only parameterized queries or the ones without regexes.", static_cast<bool>(expr->getInputParamId()) || !expr->hasRegex()); auto [equalities, hasArray, hasObject, hasNull] = _generateInExprInternal(state, expr); - return makeIsMember(var.clone(), std::move(equalities), state.data->env); + return makeIsMember( + inputExpr.extractExpr(state.slotVarMap), std::move(equalities), state.data->env); } EvalExpr generateBitTestExpr(StageBuilderState& state, const BitTestMatchExpression* expr, const sbe::BitTestBehavior& bitOp, - const sbe::EVariable& var) { + EvalExpr inputExpr) { // If there's an "inputParamId" in this expr meaning this expr got parameterized, we can // register a SlotId for it and use the slot directly. std::unique_ptr<sbe::EExpression> bitPosExpr = [&]() -> std::unique_ptr<sbe::EExpression> { @@ -2117,7 +1439,7 @@ EvalExpr generateBitTestExpr(StageBuilderState& state, auto binaryBitTestExpr = makeFunction("bitTestPosition"_sd, std::move(bitPosExpr), - var.clone(), + inputExpr.getExpr(state.slotVarMap), makeConstant(sbe::value::TypeTags::NumberInt32, static_cast<int32_t>(bitOp))); // Build An EExpression for the numeric bitmask case. The AllSet case tests if (mask & @@ -2139,12 +1461,12 @@ EvalExpr generateBitTestExpr(StageBuilderState& state, // consistent with MongoDB's documentation. auto numericBitTestInputExpr = sbe::makeE<sbe::EIf>( makeFunction("typeMatch", - var.clone(), + inputExpr.getExpr(state.slotVarMap), makeConstant(sbe::value::TypeTags::NumberInt64, sbe::value::bitcastFrom<int64_t>( getBSONTypeMask(sbe::value::TypeTags::NumberDecimal)))), - makeFunction("round"_sd, var.clone()), - var.clone()); + makeFunction("round"_sd, inputExpr.getExpr(state.slotVarMap)), + inputExpr.getExpr(state.slotVarMap)); std::unique_ptr<sbe::EExpression> bitMaskExpr = [&]() -> std::unique_ptr<sbe::EExpression> { if (auto bitMaskParamId = expr->getBitMaskParamId()) { @@ -2169,19 +1491,21 @@ EvalExpr generateBitTestExpr(StageBuilderState& state, } // numericBitTestExpr might produce Nothing, so we wrap it with makeFillEmptyFalse(). - return sbe::makeE<sbe::EIf>(makeFunction("isBinData"_sd, var.clone()), - std::move(binaryBitTestExpr), - makeFillEmptyFalse(std::move(numericBitTestExpr))); + return sbe::makeE<sbe::EIf>( + makeFunction("isBinData"_sd, inputExpr.extractExpr(state.slotVarMap)), + std::move(binaryBitTestExpr), + makeFillEmptyFalse(std::move(numericBitTestExpr))); } EvalExpr generateModExpr(StageBuilderState& state, const ModMatchExpression* expr, - const sbe::EVariable& var) { + EvalExpr inputExpr) { auto frameId = state.frameId(); - const sbe::EVariable& dividend = var; + auto& dividend = inputExpr; sbe::EVariable dividendConvertedToNumberInt64{frameId, 0}; auto truncatedArgument = sbe::makeE<sbe::ENumericConvert>( - makeFunction("trunc"_sd, dividend.clone()), sbe::value::TypeTags::NumberInt64); + makeFunction("trunc"_sd, dividend.getExpr(state.slotVarMap)), + sbe::value::TypeTags::NumberInt64); tassert(6142202, "Either both divisor and remainer are parameterized or none", (expr->getDivisorInputParamId() && expr->getRemainderInputParamId()) || @@ -2214,20 +1538,21 @@ EvalExpr generateModExpr(StageBuilderState& state, sbe::EPrimBinary::eq, makeFunction("mod"_sd, dividendConvertedToNumberInt64.clone(), std::move(divisorExpr)), std::move(remainderExpr)))); - return makeBinaryOp(sbe::EPrimBinary::logicAnd, - makeNot(makeBinaryOp(sbe::EPrimBinary::logicOr, - generateNonNumericCheck(dividend), - makeBinaryOp(sbe::EPrimBinary::logicOr, - generateNaNCheck(dividend), - generateInfinityCheck(dividend)))), - sbe::makeE<sbe::ELocalBind>(frameId, - sbe::makeEs(std::move(truncatedArgument)), - std::move(modExpression))); + return makeBinaryOp( + sbe::EPrimBinary::logicAnd, + makeNot( + makeBinaryOp(sbe::EPrimBinary::logicOr, + generateNonNumericCheck(dividend.clone(), state.slotVarMap), + makeBinaryOp(sbe::EPrimBinary::logicOr, + generateNaNCheck(dividend.clone(), state.slotVarMap), + generateInfinityCheck(dividend.clone(), state.slotVarMap)))), + sbe::makeE<sbe::ELocalBind>( + frameId, sbe::makeEs(std::move(truncatedArgument)), std::move(modExpression))); } EvalExpr generateRegexExpr(StageBuilderState& state, const RegexMatchExpression* expr, - const sbe::EVariable& var) { + EvalExpr inputExpr) { tassert(6142203, "Either both sourceRegex and compiledRegex are parameterized or none", (expr->getSourceRegexInputParamId() && expr->getCompiledRegexInputParamId()) || @@ -2257,16 +1582,17 @@ EvalExpr generateRegexExpr(StageBuilderState& state, auto resultExpr = makeBinaryOp( sbe::EPrimBinary::logicOr, - makeFillEmptyFalse( - makeBinaryOp(sbe::EPrimBinary::eq, var.clone(), std::move(bsonRegexExpr))), - makeFillEmptyFalse(makeFunction("regexMatch", std::move(compiledRegexExpr), var.clone()))); + makeFillEmptyFalse(makeBinaryOp( + sbe::EPrimBinary::eq, inputExpr.getExpr(state.slotVarMap), std::move(bsonRegexExpr))), + makeFillEmptyFalse(makeFunction( + "regexMatch", std::move(compiledRegexExpr), inputExpr.getExpr(state.slotVarMap)))); return std::move(resultExpr); } EvalExpr generateWhereExpr(StageBuilderState& state, const WhereMatchExpression* expr, - const sbe::EVariable& var) { + EvalExpr inputExpr) { // Generally speaking, this visitor is non-destructive and does not mutate the MatchExpression // tree. However, in order to apply an optimization to avoid making a copy of the 'JsFunction' // object stored within 'WhereMatchExpression', we can transfer its ownership from the match @@ -2279,15 +1605,16 @@ EvalExpr generateWhereExpr(StageBuilderState& state, sbe::value::bitcastFrom<JsFunction*>( const_cast<WhereMatchExpression*>(expr)->extractPredicate().release())); - std::unique_ptr<sbe::EExpression> whereExpr; // If there's an "inputParamId" in this expr meaning this expr got parameterized, we can // register a SlotId for it and use the slot directly. if (auto inputParam = expr->getInputParamId()) { auto inputParamSlotId = state.registerInputParamSlot(*inputParam); - whereExpr = makeFunction("runJsPredicate", makeVariable(inputParamSlotId), var.clone()); + return makeFunction("runJsPredicate", + makeVariable(inputParamSlotId), + inputExpr.extractExpr(state.slotVarMap)); } else { - whereExpr = makeFunction("runJsPredicate", std::move(predicate), var.clone()); + return makeFunction( + "runJsPredicate", std::move(predicate), inputExpr.extractExpr(state.slotVarMap)); } - return std::move(whereExpr); } } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_filter.h b/src/mongo/db/query/sbe_stage_builder_filter.h index 515635f9f56..4e0376f3a74 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.h +++ b/src/mongo/db/query/sbe_stage_builder_filter.h @@ -41,9 +41,8 @@ namespace mongo::stage_builder { class PlanStageSlots; /** - * This function generates an SBE plan stage tree implementing a filter expression represented by - * 'root'. The 'stage' parameter provides the input subtree to build on top of. The 'inputSlot' - * and 'slots' parameters specify the input(s) that the filter should use. + * This function generates an EvalExpr that implements the filter expression represented by 'root'. + * The 'inputExpr' and 'slots' parameters specify the input(s) that the filter should use. * * The 'isFilterOverIxscan' parameter controls if we should search for kField slots in 'slots' that * correspond to the full paths needed by the filter. Typically 'isFilterOverIxscan' is false unless @@ -53,25 +52,15 @@ class PlanStageSlots; * key field names ('keyFields') that lists the names of all the kField slots that are needed by * 'root'. * - * This function returns a pair containing an optional<SlotId> and an EvalStage. If 'trackIndex' is - * false then the optional<SlotId> will always be boost::none. If 'trackIndex' is true, then this - * optional<SlotId> will be set to a slot that holds information about the index of the matching - * element if an array traversal was performed. (If multiple array traversals were performed, it is - * undefined which traversal will report the index of the matching array element.) - * - * Note that this function does not allow both 'isFilterOverIxscan' and 'trackIndex' to be true. If - * they are both true, then this function will throw an exception. + * This function returns an EvalExpr. If 'root' is an AND with no children, this function will + * return a null EvalExpr to indicate that there is no filter condition. */ -std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter( - StageBuilderState& state, - const MatchExpression* root, - EvalStage stage, - boost::optional<sbe::value::SlotId> inputSlot, - const PlanStageSlots* slots, - PlanNodeId planNodeId, - const std::vector<std::string>& keyFields = {}, - bool isFilterOverIxscan = false, - bool trackIndex = false); +EvalExpr generateFilter(StageBuilderState& state, + const MatchExpression* root, + EvalExpr inputExpr, + const PlanStageSlots* slots, + const std::vector<std::string>& keyFields = {}, + bool isFilterOverIxscan = false); /** * Converts the list of equalities inside the given $in expression ('expr') into an SBE array, which @@ -101,21 +90,21 @@ std::pair<sbe::value::TypeTags, sbe::value::Value> convertBitTestBitPositions( EvalExpr generateComparisonExpr(StageBuilderState& state, const ComparisonMatchExpression* expr, sbe::EPrimBinary::Op binaryOp, - const sbe::EVariable& var); + EvalExpr inputExpr); EvalExpr generateInExpr(StageBuilderState& state, const InMatchExpression* expr, - const sbe::EVariable& var); + EvalExpr inputExpr); EvalExpr generateBitTestExpr(StageBuilderState& state, const BitTestMatchExpression* expr, const sbe::BitTestBehavior& bitOp, - const sbe::EVariable& var); + EvalExpr inputExpr); EvalExpr generateModExpr(StageBuilderState& state, const ModMatchExpression* expr, - const sbe::EVariable& var); + EvalExpr inputExpr); EvalExpr generateRegexExpr(StageBuilderState& state, const RegexMatchExpression* expr, - const sbe::EVariable& var); + EvalExpr inputExpr); EvalExpr generateWhereExpr(StageBuilderState& state, const WhereMatchExpression* expr, - const sbe::EVariable& var); + EvalExpr inputExpr); } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp index 68652223cf1..d3bf8d32ad2 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp +++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp @@ -142,10 +142,21 @@ std::unique_ptr<sbe::EExpression> generateNullOrMissing(std::unique_ptr<sbe::EEx return generateNullOrMissingExpr(*arg); } +std::unique_ptr<sbe::EExpression> generateNullOrMissing(EvalExpr arg, + optimizer::SlotVarMap& slotVarMap) { + auto expr = arg.extractExpr(slotVarMap); + return generateNullOrMissingExpr(*expr); +} + std::unique_ptr<sbe::EExpression> generateNonNumericCheck(const sbe::EVariable& var) { return makeNot(makeFunction("isNumber", var.clone())); } +std::unique_ptr<sbe::EExpression> generateNonNumericCheck(EvalExpr expr, + optimizer::SlotVarMap& slotVarMap) { + return makeNot(makeFunction("isNumber", expr.extractExpr(slotVarMap))); +} + std::unique_ptr<sbe::EExpression> generateLongLongMinCheck(const sbe::EVariable& var) { return makeBinaryOp( sbe::EPrimBinary::logicAnd, @@ -165,10 +176,20 @@ std::unique_ptr<sbe::EExpression> generateNaNCheck(const sbe::EVariable& var) { return makeFunction("isNaN", var.clone()); } +std::unique_ptr<sbe::EExpression> generateNaNCheck(EvalExpr expr, + optimizer::SlotVarMap& slotVarMap) { + return makeFunction("isNaN", expr.extractExpr(slotVarMap)); +} + std::unique_ptr<sbe::EExpression> generateInfinityCheck(const sbe::EVariable& var) { return makeFunction("isInfinity"_sd, var.clone()); } +std::unique_ptr<sbe::EExpression> generateInfinityCheck(EvalExpr expr, + optimizer::SlotVarMap& slotVarMap) { + return makeFunction("isInfinity"_sd, expr.extractExpr(slotVarMap)); +} + std::unique_ptr<sbe::EExpression> generateNonPositiveCheck(const sbe::EVariable& var) { return makeBinaryOp(sbe::EPrimBinary::EPrimBinary::lessEq, var.clone(), @@ -537,7 +558,7 @@ std::unique_ptr<sbe::EExpression> makeIfNullExpr( return expr; } -EvalExprStagePair generateUnion(std::vector<EvalExprStagePair> branches, +EvalExprStagePair generateUnion(std::vector<std::pair<EvalExpr, EvalStage>> branches, BranchFn branchFn, PlanNodeId planNodeId, sbe::value::SlotIdGenerator* slotIdGenerator, @@ -572,22 +593,11 @@ EvalExprStagePair generateUnion(std::vector<EvalExprStagePair> branches, return {outputSlot, std::move(outputStage)}; } -EvalExprStagePair generateSingleResultUnion(std::vector<EvalExprStagePair> branches, - BranchFn branchFn, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - optimizer::SlotVarMap& slotVarMap) { - auto [unionEvalExpr, unionEvalStage] = generateUnion( - std::move(branches), std::move(branchFn), planNodeId, slotIdGenerator, slotVarMap); - return {std::move(unionEvalExpr), - EvalStage{makeLimitTree(unionEvalStage.extractStage(planNodeId), planNodeId), - unionEvalStage.extractOutSlots()}}; -} - -optimizer::ABT makeBalancedBooleanOpTreeImpl(optimizer::Operations logicOp, - std::vector<optimizer::ABT>& leaves, - size_t from, - size_t until) { +std::unique_ptr<sbe::EExpression> makeBalancedBooleanOpTreeImpl( + sbe::EPrimBinary::Op logicOp, + std::vector<std::unique_ptr<sbe::EExpression>>& leaves, + size_t from, + size_t until) { invariant(from < until); if (from + 1 == until) { return std::move(leaves[from]); @@ -595,20 +605,19 @@ optimizer::ABT makeBalancedBooleanOpTreeImpl(optimizer::Operations logicOp, size_t mid = (from + until) / 2; auto lhs = makeBalancedBooleanOpTreeImpl(logicOp, leaves, from, mid); auto rhs = makeBalancedBooleanOpTreeImpl(logicOp, leaves, mid, until); - return optimizer::make<optimizer::BinaryOp>(logicOp, std::move(lhs), std::move(rhs)); + return makeBinaryOp(logicOp, std::move(lhs), std::move(rhs)); } } -optimizer::ABT makeBalancedBooleanOpTree(optimizer::Operations logicOp, - std::vector<optimizer::ABT> leaves) { +std::unique_ptr<sbe::EExpression> makeBalancedBooleanOpTree( + sbe::EPrimBinary::Op logicOp, std::vector<std::unique_ptr<sbe::EExpression>> leaves) { return makeBalancedBooleanOpTreeImpl(logicOp, leaves, 0, leaves.size()); } -std::unique_ptr<sbe::EExpression> makeBalancedBooleanOpTreeImpl( - sbe::EPrimBinary::Op logicOp, - std::vector<std::unique_ptr<sbe::EExpression>>& leaves, - size_t from, - size_t until) { +optimizer::ABT makeBalancedBooleanOpTreeImpl(optimizer::Operations logicOp, + std::vector<optimizer::ABT>& leaves, + size_t from, + size_t until) { invariant(from < until); if (from + 1 == until) { return std::move(leaves[from]); @@ -616,109 +625,37 @@ std::unique_ptr<sbe::EExpression> makeBalancedBooleanOpTreeImpl( size_t mid = (from + until) / 2; auto lhs = makeBalancedBooleanOpTreeImpl(logicOp, leaves, from, mid); auto rhs = makeBalancedBooleanOpTreeImpl(logicOp, leaves, mid, until); - return makeBinaryOp(logicOp, std::move(lhs), std::move(rhs)); + return optimizer::make<optimizer::BinaryOp>(logicOp, std::move(lhs), std::move(rhs)); } } -std::unique_ptr<sbe::EExpression> makeBalancedBooleanOpTree( - sbe::EPrimBinary::Op logicOp, std::vector<std::unique_ptr<sbe::EExpression>> leaves) { +optimizer::ABT makeBalancedBooleanOpTree(optimizer::Operations logicOp, + std::vector<optimizer::ABT> leaves) { return makeBalancedBooleanOpTreeImpl(logicOp, leaves, 0, leaves.size()); } -EvalExprStagePair generateShortCircuitingLogicalOp(sbe::EPrimBinary::Op logicOp, - std::vector<EvalExprStagePair> branches, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - optimizer::SlotVarMap& slotVarMap, - const FilterStateHelper& stateHelper) { - invariant(logicOp == sbe::EPrimBinary::logicAnd || logicOp == sbe::EPrimBinary::logicOr); - - if (!branches.empty() && logicOp == sbe::EPrimBinary::logicOr) { - // OR does not support index tracking, so we must ensure that state from the last branch - // holds only boolean value. - // NOTE: There is no technical reason for that. We could support index tracking for OR - // expression, but this would differ from the existing behaviour. - auto& [expr, _] = branches.back(); - if (expr.hasExpr()) { - expr = stateHelper.makeState(stateHelper.getBool(expr.extractExpr(slotVarMap))); - } else { - expr = stateHelper.makeState(stateHelper.getBool(expr.extractABT(slotVarMap))); +EvalExpr makeBalancedBooleanOpTree(sbe::EPrimBinary::Op logicOp, + std::vector<EvalExpr> leaves, + optimizer::SlotVarMap& slotVarMap) { + if (std::all_of( + leaves.begin(), leaves.end(), [](auto&& e) { return e.hasABT() || e.hasSlot(); })) { + std::vector<optimizer::ABT> abtExprs; + abtExprs.reserve(leaves.size()); + for (auto&& e : leaves) { + abtExprs.emplace_back(e.extractABT(slotVarMap)); } + return EvalExpr{makeBalancedBooleanOpTree(logicOp == sbe::EPrimBinary::logicAnd + ? optimizer::Operations::And + : optimizer::Operations::Or, + std::move(abtExprs))}; } - // For AND and OR, if 'branches' only has one element, we can just return branches[0]. - if (branches.size() == 1) { - return std::move(branches[0]); + std::vector<std::unique_ptr<sbe::EExpression>> exprs; + exprs.reserve(leaves.size()); + for (auto&& e : leaves) { + exprs.emplace_back(e.extractExpr(slotVarMap)); } - - bool exprOnlyBranches = true, allAbtEligibleBranches = true; - for (const auto& [expr, stage] : branches) { - exprOnlyBranches &= stage.isNull(); - allAbtEligibleBranches &= expr.hasABT() || expr.hasSlot(); - } - - if (exprOnlyBranches) { - if (allAbtEligibleBranches) { - std::vector<optimizer::ABT> leaves; - leaves.reserve(branches.size()); - for (auto& branch : branches) { - auto& [expr, _] = branch; - leaves.emplace_back(stateHelper.getBool(expr.extractABT(slotVarMap))); - } - // Create the balanced binary tree to keep the tree shallow and safe for recursion. - auto exprOnlyOp = makeBalancedBooleanOpTree(logicOp == sbe::EPrimBinary::logicAnd - ? optimizer::Operations::And - : optimizer::Operations::Or, - std::move(leaves)); - return {EvalExpr{std::move(exprOnlyOp)}, EvalStage{}}; - } - // Fallback to generate an SBE EExpression node. - - std::vector<std::unique_ptr<sbe::EExpression>> leaves; - leaves.reserve(branches.size()); - for (auto& branch : branches) { - auto& [expr, _] = branch; - leaves.push_back(stateHelper.getBool(expr.extractExpr(slotVarMap))); - } - // Create the balanced binary tree to keep the tree shallow and safe for recursion. - auto exprOnlyOp = makeBalancedBooleanOpTree(logicOp, std::move(leaves)); - return {EvalExpr{std::move(exprOnlyOp)}, EvalStage{}}; - } - - // Prepare to create limit-1/union with N branches (where N is the number of operands). Each - // branch will be evaluated from left to right until one of the branches produces a value. The - // first N-1 branches have a FilterStage to control whether they produce a value. If a branch's - // filter condition is true, the branch will produce a value and the remaining branches will not - // be evaluated. In other words, the evaluation process will "short-circuit". If a branch's - // filter condition is false, the branch will not produce a value and the evaluation process - // will continue. The last branch doesn't have a FilterStage and will always produce a value. - auto branchFn = [logicOp, &stateHelper](EvalExpr expr, - EvalStage stage, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - optimizer::SlotVarMap& slotVarMap) { - // Create a FilterStage for each branch (except the last one). If a branch's filter - // condition is true, it will "short-circuit" the evaluation process. For AND, short- - // circuiting should happen if an operand evalautes to false. For OR, short-circuiting - // should happen if an operand evaluates to true. - // Set up an output value to be returned if short-circuiting occurs. For AND, when - // short-circuiting occurs, the output returned should be false. For OR, when short- - // circuiting occurs, the output returned should be true. - auto filterExpr = stateHelper.getBool(expr.extractExpr(slotVarMap)); - if (logicOp == sbe::EPrimBinary::logicAnd) { - filterExpr = makeNot(std::move(filterExpr)); - } - stage = makeFilter<false>(std::move(stage), std::move(filterExpr), planNodeId); - - auto resultSlot = slotIdGenerator->generate(); - auto resultValue = stateHelper.makeState(logicOp == sbe::EPrimBinary::logicOr); - stage = makeProject(std::move(stage), planNodeId, resultSlot, std::move(resultValue)); - - return std::make_pair(resultSlot, std::move(stage)); - }; - - return generateSingleResultUnion( - std::move(branches), branchFn, planNodeId, slotIdGenerator, slotVarMap); + return EvalExpr{makeBalancedBooleanOpTree(logicOp, std::move(exprs))}; } std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateVirtualScan( @@ -810,66 +747,6 @@ uint32_t dateTypeMask() { getBSONTypeMask(sbe::value::TypeTags::bsonObjectId)); } -EvalStage IndexStateHelper::makeTraverseCombinator( - EvalStage outer, - EvalStage inner, - sbe::value::SlotId inputSlot, - sbe::value::SlotId outputSlot, - sbe::value::SlotId innerOutputSlot, - PlanNodeId planNodeId, - sbe::value::FrameIdGenerator* frameIdGenerator) const { - // Fold expression is executed only when array has more then 1 element. It increments index - // value on each iteration. During this process index is paired with false value. Once the - // predicate evaluates to true, false value of index is changed to true. Final expression of - // traverse stage detects that now index is paired with true value and it means that we have - // found an index of array element where predicate evaluates to true. - // - // First step is to increment index. Fold expression is always executed when index stored in - // 'outputSlot' is encoded as a false value. This means that to increment index, we should - // subtract 1 from it. - auto frameId = frameIdGenerator->generate(); - auto advancedIndex = sbe::makeE<sbe::EPrimBinary>( - sbe::EPrimBinary::sub, sbe::makeE<sbe::EVariable>(outputSlot), makeConstant(ValueType, 1)); - auto binds = sbe::makeEs(std::move(advancedIndex)); - sbe::EVariable advancedIndexVar{frameId, 0}; - - // In case the predicate in the inner branch of traverse returns true, we want pair - // incremented index with true value. This will tell final expression of traverse that we - // have found a matching element and iteration can be stopped. - // The expression below express the following function: f(x) = abs(x) - 1. This function - // converts false value to a true value because f(- index - 2) = index + 1 (take a look at - // the comment for the 'IndexStateHelper' class for encoding description). - auto indexWithTrueValue = - sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::sub, - makeFunction("abs", advancedIndexVar.clone()), - makeConstant(ValueType, 1)); - - // Finally, we check if the predicate in the inner branch returned true. If that's the case, - // we pair incremented index with true value. Otherwise, it stays paired with false value. - auto foldExpr = sbe::makeE<sbe::EIf>(FilterStateHelper::getBool(innerOutputSlot), - std::move(indexWithTrueValue), - advancedIndexVar.clone()); - - foldExpr = sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(foldExpr)); - - return makeTraverse(std::move(outer), - std::move(inner), - inputSlot, - outputSlot, - innerOutputSlot, - std::move(foldExpr), - FilterStateHelper::getBool(outputSlot), - planNodeId, - 1); -} - -std::unique_ptr<FilterStateHelper> makeFilterStateHelper(bool trackIndex) { - if (trackIndex) { - return std::make_unique<IndexStateHelper>(); - } - return std::make_unique<BooleanStateHelper>(); -} - sbe::value::SlotId StageBuilderState::getGlobalVariableSlot(Variables::Id variableId) { if (auto it = data->variableIdToSlotMap.find(variableId); it != data->variableIdToSlotMap.end()) { diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.h b/src/mongo/db/query/sbe_stage_builder_helpers.h index 1958e3988c6..09102dafa2d 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.h +++ b/src/mongo/db/query/sbe_stage_builder_helpers.h @@ -88,12 +88,16 @@ std::unique_ptr<sbe::EExpression> generateNullOrMissing(sbe::FrameId frameId, sbe::value::SlotId slotId); std::unique_ptr<sbe::EExpression> generateNullOrMissing(std::unique_ptr<sbe::EExpression> arg); +std::unique_ptr<sbe::EExpression> generateNullOrMissing(EvalExpr arg, + optimizer::SlotVarMap& slotVarMap); /** * Generates an EExpression that checks if the input expression is a non-numeric type _assuming * that_ it has already been verified to be neither null nor missing. */ std::unique_ptr<sbe::EExpression> generateNonNumericCheck(const sbe::EVariable& var); +std::unique_ptr<sbe::EExpression> generateNonNumericCheck(EvalExpr expr, + optimizer::SlotVarMap& slotVarMap); /** * Generates an EExpression that checks if the input expression is the value NumberLong(-2^64). @@ -105,11 +109,15 @@ std::unique_ptr<sbe::EExpression> generateLongLongMinCheck(const sbe::EVariable& * already been verified to be numeric. */ std::unique_ptr<sbe::EExpression> generateNaNCheck(const sbe::EVariable& var); +std::unique_ptr<sbe::EExpression> generateNaNCheck(EvalExpr expr, + optimizer::SlotVarMap& slotVarMap); /** * Generates an EExpression that checks if the input expression is a numeric Infinity. */ std::unique_ptr<sbe::EExpression> generateInfinityCheck(const sbe::EVariable& var); +std::unique_ptr<sbe::EExpression> generateInfinityCheck(EvalExpr expr, + optimizer::SlotVarMap& slotVarMap); /** * Generates an EExpression that checks if the input expression is a non-positive number (i.e. <= 0) @@ -455,20 +463,11 @@ std::unique_ptr<sbe::EExpression> makeIfNullExpr( * Creates a union stage with specified branches. Each branch is passed to 'branchFn' first. If * 'branchFn' is not set, expression from branch is simply projected to a slot. */ -EvalExprStagePair generateUnion(std::vector<EvalExprStagePair> branches, +EvalExprStagePair generateUnion(std::vector<std::pair<EvalExpr, EvalStage>> branches, BranchFn branchFn, PlanNodeId planNodeId, sbe::value::SlotIdGenerator* slotIdGenerator, optimizer::SlotVarMap& slotVarMap); -/** - * Creates limit-1/union stage with specified branches. Each branch is passed to 'branchFn' first. - * If 'branchFn' is not set, expression from branch is simply projected to a slot. - */ -EvalExprStagePair generateSingleResultUnion(std::vector<EvalExprStagePair> branches, - BranchFn branchFn, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - optimizer::SlotVarMap& slotVarMap); /** This helper takes an SBE SlotIdGenerator and an SBE Array and returns an output slot and a * unwind/project/limit/coscan subtree that streams out the elements of the array one at a time via @@ -559,315 +558,18 @@ std::unique_ptr<sbe::PlanStage> makeLoopJoinForFetch(std::unique_ptr<sbe::PlanSt sbe::value::SlotVector slotsToForward); /** - * Trees generated by 'generateFilter' maintain state during execution. There are two types of state - * that can be maintained: - * - Boolean. The state is just boolean value, indicating if the document matches the predicate. - * - Index. The state stores a tuple of (boolean, optional int32). - * - * Depending on the query type, one of state types can be selected to use in the tree. - * 'FilterStateHelper' class and it's descendants aim to provide unified interface to operate with - * this two types of states. - */ -class FilterStateHelper { -public: - using Expression = std::unique_ptr<sbe::EExpression>; - - virtual ~FilterStateHelper() = default; - - /** - * Returns true if state contains a value along with boolean and false otherwise. - */ - virtual bool stateContainsValue() const = 0; - - /** - * Creates a constant holding state with given boolean 'value'. Index part of the constructed - * state is empty. - */ - virtual Expression makeState(bool value) const = 0; - - /** - * Creates an expression that constructs state from 'expr'. 'expr' must evaluate to a boolean - * value. Index part of the constructed state is empty. - */ - virtual Expression makeState(Expression expr) const = 0; - virtual optimizer::ABT makeState(optimizer::ABT expr) const = 0; - - /** - * Creates an expression that constructs an initial state from 'expr'. 'expr' must evaluate to a - * boolean value. - * Initial state is used as an output value for the inner branch passed to - * 'makeTraverseCombinator'. - */ - virtual Expression makeInitialState(Expression expr) const = 0; - - /** - * Creates an expression that extracts boolean value from the state evaluated from 'expr'. - */ - virtual Expression getBool(Expression expr) const = 0; - virtual optimizer::ABT getBool(optimizer::ABT expr) const = 0; - - Expression getBool(sbe::value::SlotId slotId) const { - return getBool(sbe::makeE<sbe::EVariable>(slotId)); - } - - /** - * Implements Elvis operator. If state from 'left' expression represents true boolean value, - * returns 'left'. Otherwise, returns 'right'. - */ - virtual Expression mergeStates(Expression left, - Expression right, - sbe::value::FrameIdGenerator* frameIdGenerator) const = 0; - - /** - * Extracts index value from the state and projects it into a separate slot. If state does not - * contain index value, slot contains Nothing. - * If state does not support storing index value, this function does nothing. - */ - virtual std::pair<boost::optional<sbe::value::SlotId>, EvalStage> projectValueCombinator( - sbe::value::SlotId stateSlot, - EvalStage stage, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - sbe::value::FrameIdGenerator* frameIdGenerator) const = 0; - - /** - * Uses an expression from 'EvalExprStagePair' to construct state. Expresion must evaluate to - * boolean value. - */ - virtual EvalExprStagePair makePredicateCombinator(EvalExprStagePair pair, - optimizer::SlotVarMap& varMap) const = 0; - - /** - * Creates traverse stage with fold and final expressions tuned to maintain consistent state. - * If state does support index value, records the index of a first array element for which inner - * branch returns true value. - */ - virtual EvalStage makeTraverseCombinator( - EvalStage outer, - EvalStage inner, - sbe::value::SlotId inputSlot, - sbe::value::SlotId outputSlot, - sbe::value::SlotId innerOutputSlot, - PlanNodeId planNodeId, - sbe::value::FrameIdGenerator* frameIdGenerator) const = 0; -}; - -/** - * This class implements 'FilterStateHelper' interface for a state which can be represented as a - * tuple of (boolean, optional int32). Such tuple is encoded as a single int64 value. - * - * While we could represent such tuple as an SBE array, this approach would cost us additional - * allocations and the need to call expensive builtins such as 'getElement'. Integer operations are - * much simpler, faster and do not require any allocations. - * - * The following encoding is implemented: - * - [False, Nothing] -> -1 - * - [True, Nothing] -> 0 - * - [False, value] -> - value - 2 - * - [True, value] -> value + 1 - * - * Such encoding allows us to easily extract boolean value (just compare resulting int64 with 0) and - * requires only a few arithmetical operations to extract the index value. Furthemore, we can - * increment/decrement index value simply by incrementing/decrementing the decoded value. - */ -class IndexStateHelper : public FilterStateHelper { -public: - static constexpr sbe::value::TypeTags ValueType = sbe::value::TypeTags::NumberInt64; - - bool stateContainsValue() const override { - return true; - } - - Expression makeState(bool value) const override { - return makeConstant(ValueType, value ? 0 : -1); - } - - Expression makeState(Expression expr) const override { - return sbe::makeE<sbe::EIf>(std::move(expr), makeState(true), makeState(false)); - } - - optimizer::ABT makeState(optimizer::ABT expr) const override { - return optimizer::make<optimizer::If>( - std::move(expr), optimizer::Constant::int64(0), optimizer::Constant::int64(-1)); - } - - Expression makeInitialState(Expression expr) const override { - return sbe::makeE<sbe::EIf>( - std::move(expr), makeConstant(ValueType, 1), makeConstant(ValueType, -2)); - } - - Expression getBool(Expression expr) const override { - return sbe::makeE<sbe::EPrimBinary>( - sbe::EPrimBinary::greaterEq, std::move(expr), makeConstant(ValueType, 0)); - } - - optimizer::ABT getBool(optimizer::ABT expr) const override { - return optimizer::make<optimizer::BinaryOp>( - optimizer::Operations::Gte, std::move(expr), optimizer::Constant::int64(0)); - } - - Expression mergeStates(Expression left, - Expression right, - sbe::value::FrameIdGenerator* frameIdGenerator) const override { - return makeLocalBind(frameIdGenerator, - [&](sbe::EVariable left) { - return sbe::makeE<sbe::EIf>( - getBool(left.clone()), left.clone(), std::move(right)); - }, - std::move(left)); - } - - std::pair<boost::optional<sbe::value::SlotId>, EvalStage> projectValueCombinator( - sbe::value::SlotId stateSlot, - EvalStage stage, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - sbe::value::FrameIdGenerator* frameIdGenerator) const override { - sbe::EVariable stateVar{stateSlot}; - auto indexSlot = slotIdGenerator->generate(); - - auto indexInt64 = sbe::makeE<sbe::EPrimBinary>( - sbe::EPrimBinary::sub, stateVar.clone(), makeConstant(ValueType, 1)); - - auto indexInt32 = makeLocalBind( - frameIdGenerator, - [&](sbe::EVariable convertedIndex) { - return sbe::makeE<sbe::EIf>( - makeFunction("exists", convertedIndex.clone()), - convertedIndex.clone(), - sbe::makeE<sbe::EFail>(ErrorCodes::Error{5291403}, - "Cannot convert array index into int32 number")); - }, - sbe::makeE<sbe::ENumericConvert>(std::move(indexInt64), - sbe::value::TypeTags::NumberInt32)); - - auto resultStage = makeProject( - std::move(stage), - planNodeId, - indexSlot, - sbe::makeE<sbe::EIf>(sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::greater, - stateVar.clone(), - makeConstant(ValueType, 0)), - std::move(indexInt32), - makeConstant(sbe::value::TypeTags::Nothing, 0))); - return {indexSlot, std::move(resultStage)}; - } - - EvalExprStagePair makePredicateCombinator(EvalExprStagePair pair, - optimizer::SlotVarMap& varMap) const override { - auto [expr, stage] = std::move(pair); - return {makeState(expr.extractExpr(varMap)), std::move(stage)}; - } - - EvalStage makeTraverseCombinator(EvalStage outer, - EvalStage inner, - sbe::value::SlotId inputSlot, - sbe::value::SlotId outputSlot, - sbe::value::SlotId innerOutputSlot, - PlanNodeId planNodeId, - sbe::value::FrameIdGenerator* frameIdGenerator) const override; -}; - -/** - * This class implements 'FilterStateHelper' interface for a plain boolean state, without index - * part. - */ -class BooleanStateHelper : public FilterStateHelper { -public: - bool stateContainsValue() const override { - return false; - } - - Expression makeState(bool value) const override { - return makeConstant(sbe::value::TypeTags::Boolean, value); - } - - Expression makeState(Expression expr) const override { - return expr; - } - - optimizer::ABT makeState(optimizer::ABT expr) const override { - return expr; - } - - Expression makeInitialState(Expression expr) const override { - return expr; - } - - Expression getBool(Expression expr) const override { - return expr; - } - - optimizer::ABT getBool(optimizer::ABT expr) const override { - return expr; - } - - Expression mergeStates(Expression left, - Expression right, - sbe::value::FrameIdGenerator* frameIdGenerator) const override { - return sbe::makeE<sbe::EPrimBinary>( - sbe::EPrimBinary::logicOr, std::move(left), std::move(right)); - } - - std::pair<boost::optional<sbe::value::SlotId>, EvalStage> projectValueCombinator( - sbe::value::SlotId stateSlot, - EvalStage stage, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - sbe::value::FrameIdGenerator* frameIdGenerator) const override { - return {stateSlot, std::move(stage)}; - } - - EvalExprStagePair makePredicateCombinator(EvalExprStagePair pair, - optimizer::SlotVarMap& varMap) const override { - return pair; - } - - EvalStage makeTraverseCombinator( - EvalStage outer, - EvalStage inner, - sbe::value::SlotId inputSlot, - sbe::value::SlotId outputSlot, - sbe::value::SlotId innerOutputSlot, - PlanNodeId planNodeId, - sbe::value::FrameIdGenerator* frameIdGenerator) const override { - return makeTraverse( - std::move(outer), - std::move(inner), - inputSlot, - outputSlot, - innerOutputSlot, - sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::logicOr, - sbe::makeE<sbe::EVariable>(outputSlot), - sbe::makeE<sbe::EVariable>(innerOutputSlot)), - sbe::makeE<sbe::EVariable>(outputSlot), - planNodeId, - 1); - } -}; - -/** - * Helper function to create respective 'FilterStateHelper' implementation. If 'trackIndex' is true, - * returns 'IndexStateHelper'. Otherwise, returns 'BooleanStateHelper'. - */ -std::unique_ptr<FilterStateHelper> makeFilterStateHelper(bool trackIndex); - -/** * Creates a balanced boolean binary expression tree from given collection of leaf expression. */ std::unique_ptr<sbe::EExpression> makeBalancedBooleanOpTree( sbe::EPrimBinary::Op logicOp, std::vector<std::unique_ptr<sbe::EExpression>> leaves); -/** - * Creates tree with short-circuiting for OR and AND. Each element in 'braches' argument represents - * logical expression and sub-tree generated for it. - */ -EvalExprStagePair generateShortCircuitingLogicalOp(sbe::EPrimBinary::Op logicOp, - std::vector<EvalExprStagePair> branches, - PlanNodeId planNodeId, - sbe::value::SlotIdGenerator* slotIdGenerator, - optimizer::SlotVarMap& slotVarMap, - const FilterStateHelper& stateHelper); +optimizer::ABT makeBalancedBooleanOpTree(optimizer::Operations logicOp, + std::vector<optimizer::ABT> leaves); + +EvalExpr makeBalancedBooleanOpTree(sbe::EPrimBinary::Op logicOp, + std::vector<EvalExpr> leaves, + optimizer::SlotVarMap& slotVarMap); + /** * Given an index key pattern, and a subset of the fields of the index key pattern that are depended @@ -1534,12 +1236,6 @@ inline auto makeABTConstant(StringData str) { } /** - * Creates a balanced boolean binary expression tree from given collection of leaf expression. - */ -optimizer::ABT makeBalancedBooleanOpTree(optimizer::Operations logicOp, - std::vector<optimizer::ABT> leaves); - -/** * Check if expression returns Nothing and return boolean false if so. Otherwise, return the * expression. */ diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp index 5f09f619864..801ecdca5dc 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp @@ -717,7 +717,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( // Determine the set of fields from the index required to apply the filter and union those with // the set of fields from the index required by the parent stage. - auto [indexFilterFieldBitset, indexFilterFieldNames] = [&]() { + auto [filterFieldBitset, filterFields] = [&]() { if (ixn->filter) { DepsTracker tracker; match_expression::addDependencies(ixn->filter.get(), &tracker); @@ -725,7 +725,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( } return std::make_pair(sbe::IndexKeysInclusionSet{}, std::vector<std::string>{}); }(); - auto fieldBitset = originalFieldBitset | indexFilterFieldBitset; + auto fieldBitset = originalFieldBitset | filterFieldBitset; auto fieldAndSortKeyBitset = fieldBitset | sortKeyBitset; // Add the access method corresponding to 'indexName' to 'iamMap' if needed. @@ -839,21 +839,13 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( } if (ixn->filter) { - // Relevant slots must include slots for all index keys in case they are needed by parent - // stages (for instance, covered shard filter). - auto relevantSlots = getSlotsToForward(reqs, outputs); - - auto [_, outputStage] = generateFilter(state, - ixn->filter.get(), - {std::move(stage), std::move(relevantSlots)}, - boost::none, - &outputs, - ixn->nodeId(), - indexFilterFieldNames, - true /* useKeySlots */, - false /* trackIndex */); - - stage = outputStage.extractStage(ixn->nodeId()); + const bool isOverIxscan = true; + auto filterExpr = + generateFilter(state, ixn->filter.get(), {}, &outputs, filterFields, isOverIxscan); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(state.slotVarMap), ixn->nodeId()); + } } return {std::move(stage), std::move(outputs)}; @@ -968,7 +960,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith // Determine the set of fields from the index required to apply the filter and union those // with the set of fields from the index required by the parent stage. - auto [indexFilterFieldBitset, indexFilterFieldNames] = [&]() { + auto [filterFieldBitset, filterFields] = [&]() { if (ixn->filter) { DepsTracker tracker; match_expression::addDependencies(ixn->filter.get(), &tracker); @@ -976,7 +968,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith } return std::make_pair(sbe::IndexKeysInclusionSet{}, std::vector<std::string>{}); }(); - auto fieldBitset = originalFieldBitset | indexFilterFieldBitset; + auto fieldBitset = originalFieldBitset | filterFieldBitset; auto fieldAndSortKeyBitset = fieldBitset | sortKeyBitset; auto outputFieldAndSortKeySlots = @@ -1150,21 +1142,13 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith } if (ixn->filter) { - // Relevant slots must include slots for all index keys in case they are needed by parent - // stages (for instance, covered shard filter). - auto relevantSlots = getSlotsToForward(reqs, outputs); - - auto [_, outputStage] = generateFilter(state, - ixn->filter.get(), - {std::move(stage), std::move(relevantSlots)}, - boost::none, - &outputs, - ixn->nodeId(), - indexFilterFieldNames, - true /* useKeySlots */, - false /* trackIndex */); - - stage = outputStage.extractStage(ixn->nodeId()); + const bool isOverIxscan = true; + auto filterExpr = + generateFilter(state, ixn->filter.get(), {}, &outputs, filterFields, isOverIxscan); + if (!filterExpr.isNull()) { + stage = sbe::makeS<sbe::FilterStage<false>>( + std::move(stage), filterExpr.extractExpr(state.slotVarMap), ixn->nodeId()); + } } state.data->indexBoundsEvaluationInfos.emplace_back( diff --git a/src/mongo/db/query/sbe_stage_builder_projection.cpp b/src/mongo/db/query/sbe_stage_builder_projection.cpp index 28c447022d3..5ad0a228d7a 100644 --- a/src/mongo/db/query/sbe_stage_builder_projection.cpp +++ b/src/mongo/db/query/sbe_stage_builder_projection.cpp @@ -84,9 +84,8 @@ struct ProjectionTraversalVisitorContext { EvalExpr getInputEvalExpr() const { return inputExpr.clone(); } - std::unique_ptr<sbe::EExpression> getInputExpr(optimizer::SlotVarMap& slotVarMap) const { - return getInputEvalExpr().extractExpr(slotVarMap); + return inputExpr.getExpr(slotVarMap); } EvalExpr extractInputEvalExpr() { |