diff options
author | Yoonsoo Kim <yoonsoo.kim@mongodb.com> | 2022-09-16 14:46:23 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-16 21:41:13 +0000 |
commit | 52940805f01b2599ed6ed3221094716766e347ac (patch) | |
tree | 2a93de92f376e747ecc0523baa45290784b3059e | |
parent | 787238e02e9ac372b495874f7047216f13e9f62b (diff) | |
download | mongo-52940805f01b2599ed6ed3221094716766e347ac.tar.gz |
SERVER-69707 Fix undefined slot error for $group with conditional expression
(cherry picked from commit 8e990303de72e04600770082e41aec92f6221c8b)
-rw-r--r-- | jstests/noPassthroughWithMongod/group_pushdown.js | 188 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 92 |
2 files changed, 263 insertions, 17 deletions
diff --git a/jstests/noPassthroughWithMongod/group_pushdown.js b/jstests/noPassthroughWithMongod/group_pushdown.js index d53176b4a55..a4d6bd4b7f8 100644 --- a/jstests/noPassthroughWithMongod/group_pushdown.js +++ b/jstests/noPassthroughWithMongod/group_pushdown.js @@ -229,6 +229,194 @@ assertResultsMatchWithAndWithoutPushdown( assertResultsMatchWithAndWithoutPushdown( coll, pipeline, [{_id: "a", ss: 30}, {_id: "b", ss: 60}, {_id: "c", ss: 10}], 2)); +// The second $group stage refers to a top-field below a $switch +assertResultsMatchWithAndWithoutPushdown(coll, + [ + {$group: {_id: {$divide: ["$price", 5]}}}, + { + $group: { + _id: null, + lowp: { + $sum: { + $switch: { + branches: [{ + case: {$lte: ["$_id", 3]}, + then: 1 + }], + default: 0 + } + } + }, + highp: { + $sum: { + $switch: { + branches: [{ + case: {$gt: ["$_id", 3]}, + then: 1 + }], + default: 0 + } + } + } + } + } + ], + [{"_id": null, "lowp": 2, "highp": 1}], + 2); + +// The second $group stage refers to a top-field below a $cond +assertResultsMatchWithAndWithoutPushdown( + coll, + [ + {$group: {_id: {$divide: ["$price", 5]}}}, + { + $group: { + _id: null, + lowp: {$sum: {$cond: [{$lte: ["$_id", 3]}, 1, 0]}}, + highp: {$sum: {$cond: [{$gt: ["$_id", 3]}, 1, 0]}} + } + } + ], + [{"_id": null, "lowp": 2, "highp": 1}], + 2); + +// The second $group stage refers to a top-field below a nested $cond / $ifNull +assertResultsMatchWithAndWithoutPushdown(coll, + [ + {$group: {_id: {$divide: ["$price", 5]}}}, + { + $group: { + _id: null, + lowp: { + $sum: { + $cond: [ + { + $lte: + [{$ifNull: ["$_id", 0]}, 3] + }, + 1, + 0 + ] + } + }, + highp: { + $sum: { + $cond: [ + {$gt: [{$ifNull: ["$_id", 0]}, 3]}, + 1, + 0 + ] + } + } + } + } + ], + [{"_id": null, "lowp": 2, "highp": 1}], + 2); + +// The second $group stage refers to top-fields below a $filter +assertResultsMatchWithAndWithoutPushdown( + coll, + [ + {$group: {_id: "$item", prices: {$push: "$price"}}}, + { + $group: { + _id: "$_id", + o: {$push: {$filter: {input: "$prices", as: "p", cond: {$gte: ["$$p", 5]}}}} + } + } + ], + [{"_id": "a", "o": [[10, 5]]}, {"_id": "b", "o": [[20, 10]]}, {"_id": "c", "o": [[5]]}], + 2); + +// The second $group stage refers to top-fields below a $let +assertResultsMatchWithAndWithoutPushdown( + coll, + [ + {$group: { + _id: "$item", maxp: {$max: "$price"}, minp: {$min: "$price"}, count: {$count: {}} + }}, + {$group: {_id: "$_id", o: {$sum: { + $let: { + vars: { + minPlusMax: {$add: ["$maxp", "$minp"]}, + count: "$count" + }, + in: {$multiply: ["$$minPlusMax", "$$count"]} + } + }}}} + ], + [{ "_id" : "a", "o" : 30 }, { "_id" : "c", "o" : 10 }, { "_id" : "b", "o" : 60 }], + 2); + +// The second $group stage refers to top-fields below a $and +assertResultsMatchWithAndWithoutPushdown(coll, + [ + { + $group: { + _id: "$item", + maxp: {$max: "$price"}, + minp: {$min: "$price"} + } + }, + { + $group: + { + _id: "$_id", + o: + { + $sum: + { + $and: [ + {$gt: ["$maxp", 15]}, + {$lt: ["$minp", 10]} + ] + } + } + } + } + ], + [ + {"_id": "a", "o": 0}, + {"_id": "c", "o": 0}, + {"_id": "b", "o": 0} + ], + 2); + +// The second $group stage refers to top-fields below a $or +assertResultsMatchWithAndWithoutPushdown(coll, + [ + { + $group: { + _id: "$item", + maxp: {$max: "$price"}, + minp: {$min: "$price"} + } + }, + { + $group: + { + _id: "$_id", + o: + { + $sum: + { + $or: [ + {$gt: ["$maxp", 15]}, + {$lt: ["$minp", 10]} + ] + } + } + } + } + ], + [ + {"_id": "a", "o": 0}, + {"_id": "c", "o": 0}, + {"_id": "b", "o": 0} + ], + 2); + // The second $group stage refers to both a top-level field and a sub-field twice which does not // exist. assertResultsMatchWithAndWithoutPushdown( diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index d16cf3ce880..78fee0dc2a0 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -2100,44 +2100,104 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder namespace { template <typename F> -struct FieldPathVisitor : public SelectiveConstExpressionVisitorBase { +struct FieldPathAndCondPreVisitor : public SelectiveConstExpressionVisitorBase { // To avoid overloaded-virtual warnings. using SelectiveConstExpressionVisitorBase::visit; - FieldPathVisitor(const F& fn) : _fn(fn) {} + FieldPathAndCondPreVisitor(const F& fn, int32_t& nestedCondLevel) + : _fn(fn), _nestedCondLevel(nestedCondLevel) {} void visit(const ExpressionFieldPath* expr) final { - _fn(expr); + _fn(expr, _nestedCondLevel); + } + + void visit(const ExpressionCond* expr) final { + ++_nestedCondLevel; + } + + void visit(const ExpressionSwitch* expr) final { + ++_nestedCondLevel; + } + + void visit(const ExpressionIfNull* expr) final { + ++_nestedCondLevel; + } + + void visit(const ExpressionAnd* expr) final { + ++_nestedCondLevel; + } + + void visit(const ExpressionOr* expr) final { + ++_nestedCondLevel; } F _fn; + // Tracks the number of conditional expressions like $cond or $ifNull that are above us in the + // tree. + int32_t& _nestedCondLevel; +}; + +struct CondPostVisitor : public SelectiveConstExpressionVisitorBase { + // To avoid overloaded-virtual warnings. + using SelectiveConstExpressionVisitorBase::visit; + + CondPostVisitor(int32_t& nestedCondLevel) : _nestedCondLevel(nestedCondLevel) {} + + void visit(const ExpressionCond* expr) final { + --_nestedCondLevel; + } + + void visit(const ExpressionSwitch* expr) final { + --_nestedCondLevel; + } + + void visit(const ExpressionIfNull* expr) final { + --_nestedCondLevel; + } + + void visit(const ExpressionAnd* expr) final { + --_nestedCondLevel; + } + + void visit(const ExpressionOr* expr) final { + --_nestedCondLevel; + } + + int32_t& _nestedCondLevel; }; /** * Walks through the 'expr' expression tree and whenever finds an 'ExpressionFieldPath', calls * the 'fn' function. Type requirement for 'fn' is it must have a const 'ExpressionFieldPath' - * pointer parameter. + * pointer parameter and 'nestedCondLevel' parameter. */ template <typename F> void walkAndActOnFieldPaths(Expression* expr, const F& fn) { - FieldPathVisitor<F> visitor(fn); - ExpressionWalker walker(&visitor, nullptr /*inVisitor*/, nullptr /*postVisitor*/); + int32_t nestedCondLevel = 0; + FieldPathAndCondPreVisitor<F> preVisitor(fn, nestedCondLevel); + CondPostVisitor postVisitor(nestedCondLevel); + ExpressionWalker walker(&preVisitor, nullptr /*inVisitor*/, &postVisitor); expression_walker::walk(expr, &walker); } /** * Checks whether all field paths in 'idExpr' and all accumulator expressions are top-level ones. */ -bool checkAllFieldPathsAreTopLevel(const boost::intrusive_ptr<Expression>& idExpr, - const std::vector<AccumulationStatement>& accStmts) { - auto areAllTopLevelFields = true; +bool areAllFieldPathsOptimizable(const boost::intrusive_ptr<Expression>& idExpr, + const std::vector<AccumulationStatement>& accStmts) { + auto areFieldPathsOptimizable = true; - auto checkFieldPath = [&](const ExpressionFieldPath* fieldExpr) { + auto checkFieldPath = [&](const ExpressionFieldPath* fieldExpr, int32_t nestedCondLevel) { // We optimize neither a field path for the top-level document itself (getPathLength() == 1) // nor a field path that refers to a variable. We can optimize only top-level fields // (getPathLength() == 2). - if (fieldExpr->getFieldPath().getPathLength() != 2 || fieldExpr->isVariableReference()) { - areAllTopLevelFields = false; + // + // The 'nestedCondLevel' being > 0 means that a field path is refered to below conditional + // expressions at the parent $group node, when we cannot optimize field path access and + // therefore, cannot avoid materialization. + if (nestedCondLevel > 0 || fieldExpr->getFieldPath().getPathLength() != 2 || + fieldExpr->isVariableReference()) { + areFieldPathsOptimizable = false; return; } }; @@ -2150,7 +2210,7 @@ bool checkAllFieldPathsAreTopLevel(const boost::intrusive_ptr<Expression>& idExp walkAndActOnFieldPaths(accStmt.expr.argument.get(), checkFieldPath); } - return areAllTopLevelFields; + return areFieldPathsOptimizable; } /** @@ -2176,7 +2236,7 @@ EvalStage optimizeFieldPaths(StageBuilderState& state, auto searchInChildOutputs = !optionalRootSlot.has_value(); auto retEvalStage = std::move(childEvalStage); - walkAndActOnFieldPaths(expr.get(), [&](const ExpressionFieldPath* fieldExpr) { + walkAndActOnFieldPaths(expr.get(), [&](const ExpressionFieldPath* fieldExpr, int32_t) { // We optimize neither a field path for the top-level document itself nor a field path that // refers to a variable instead of calling getField(). if (fieldExpr->getFieldPath().getPathLength() == 1 || fieldExpr->isVariableReference()) { @@ -2470,10 +2530,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder const auto& accStmts = groupNode->accumulators; auto childStageType = childNode->getType(); - auto areAllTopLevelFields = checkAllFieldPathsAreTopLevel(idExpr, accStmts); - auto childReqs = reqs.copy(); - if (childStageType == StageType::STAGE_GROUP && areAllTopLevelFields) { + if (childStageType == StageType::STAGE_GROUP && areAllFieldPathsOptimizable(idExpr, accStmts)) { // Does not ask the GROUP child for the result slot to avoid unnecessary materialization if // all fields are top-level fields. See the end of this function. For example, GROUP - GROUP // - COLLSCAN case. |