summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoonsoo Kim <yoonsoo.kim@mongodb.com>2022-09-16 14:46:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-16 21:41:13 +0000
commit52940805f01b2599ed6ed3221094716766e347ac (patch)
tree2a93de92f376e747ecc0523baa45290784b3059e
parent787238e02e9ac372b495874f7047216f13e9f62b (diff)
downloadmongo-52940805f01b2599ed6ed3221094716766e347ac.tar.gz
SERVER-69707 Fix undefined slot error for $group with conditional expression
(cherry picked from commit 8e990303de72e04600770082e41aec92f6221c8b)
-rw-r--r--jstests/noPassthroughWithMongod/group_pushdown.js188
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp92
2 files changed, 263 insertions, 17 deletions
diff --git a/jstests/noPassthroughWithMongod/group_pushdown.js b/jstests/noPassthroughWithMongod/group_pushdown.js
index d53176b4a55..a4d6bd4b7f8 100644
--- a/jstests/noPassthroughWithMongod/group_pushdown.js
+++ b/jstests/noPassthroughWithMongod/group_pushdown.js
@@ -229,6 +229,194 @@ assertResultsMatchWithAndWithoutPushdown(
assertResultsMatchWithAndWithoutPushdown(
coll, pipeline, [{_id: "a", ss: 30}, {_id: "b", ss: 60}, {_id: "c", ss: 10}], 2));
+// The second $group stage refers to a top-field below a $switch
+assertResultsMatchWithAndWithoutPushdown(coll,
+ [
+ {$group: {_id: {$divide: ["$price", 5]}}},
+ {
+ $group: {
+ _id: null,
+ lowp: {
+ $sum: {
+ $switch: {
+ branches: [{
+ case: {$lte: ["$_id", 3]},
+ then: 1
+ }],
+ default: 0
+ }
+ }
+ },
+ highp: {
+ $sum: {
+ $switch: {
+ branches: [{
+ case: {$gt: ["$_id", 3]},
+ then: 1
+ }],
+ default: 0
+ }
+ }
+ }
+ }
+ }
+ ],
+ [{"_id": null, "lowp": 2, "highp": 1}],
+ 2);
+
+// The second $group stage refers to a top-field below a $cond
+assertResultsMatchWithAndWithoutPushdown(
+ coll,
+ [
+ {$group: {_id: {$divide: ["$price", 5]}}},
+ {
+ $group: {
+ _id: null,
+ lowp: {$sum: {$cond: [{$lte: ["$_id", 3]}, 1, 0]}},
+ highp: {$sum: {$cond: [{$gt: ["$_id", 3]}, 1, 0]}}
+ }
+ }
+ ],
+ [{"_id": null, "lowp": 2, "highp": 1}],
+ 2);
+
+// The second $group stage refers to a top-field below a nested $cond / $ifNull
+assertResultsMatchWithAndWithoutPushdown(coll,
+ [
+ {$group: {_id: {$divide: ["$price", 5]}}},
+ {
+ $group: {
+ _id: null,
+ lowp: {
+ $sum: {
+ $cond: [
+ {
+ $lte:
+ [{$ifNull: ["$_id", 0]}, 3]
+ },
+ 1,
+ 0
+ ]
+ }
+ },
+ highp: {
+ $sum: {
+ $cond: [
+ {$gt: [{$ifNull: ["$_id", 0]}, 3]},
+ 1,
+ 0
+ ]
+ }
+ }
+ }
+ }
+ ],
+ [{"_id": null, "lowp": 2, "highp": 1}],
+ 2);
+
+// The second $group stage refers to top-fields below a $filter
+assertResultsMatchWithAndWithoutPushdown(
+ coll,
+ [
+ {$group: {_id: "$item", prices: {$push: "$price"}}},
+ {
+ $group: {
+ _id: "$_id",
+ o: {$push: {$filter: {input: "$prices", as: "p", cond: {$gte: ["$$p", 5]}}}}
+ }
+ }
+ ],
+ [{"_id": "a", "o": [[10, 5]]}, {"_id": "b", "o": [[20, 10]]}, {"_id": "c", "o": [[5]]}],
+ 2);
+
+// The second $group stage refers to top-fields below a $let
+assertResultsMatchWithAndWithoutPushdown(
+ coll,
+ [
+ {$group: {
+ _id: "$item", maxp: {$max: "$price"}, minp: {$min: "$price"}, count: {$count: {}}
+ }},
+ {$group: {_id: "$_id", o: {$sum: {
+ $let: {
+ vars: {
+ minPlusMax: {$add: ["$maxp", "$minp"]},
+ count: "$count"
+ },
+ in: {$multiply: ["$$minPlusMax", "$$count"]}
+ }
+ }}}}
+ ],
+ [{ "_id" : "a", "o" : 30 }, { "_id" : "c", "o" : 10 }, { "_id" : "b", "o" : 60 }],
+ 2);
+
+// The second $group stage refers to top-fields below a $and
+assertResultsMatchWithAndWithoutPushdown(coll,
+ [
+ {
+ $group: {
+ _id: "$item",
+ maxp: {$max: "$price"},
+ minp: {$min: "$price"}
+ }
+ },
+ {
+ $group:
+ {
+ _id: "$_id",
+ o:
+ {
+ $sum:
+ {
+ $and: [
+ {$gt: ["$maxp", 15]},
+ {$lt: ["$minp", 10]}
+ ]
+ }
+ }
+ }
+ }
+ ],
+ [
+ {"_id": "a", "o": 0},
+ {"_id": "c", "o": 0},
+ {"_id": "b", "o": 0}
+ ],
+ 2);
+
+// The second $group stage refers to top-fields below a $or
+assertResultsMatchWithAndWithoutPushdown(coll,
+ [
+ {
+ $group: {
+ _id: "$item",
+ maxp: {$max: "$price"},
+ minp: {$min: "$price"}
+ }
+ },
+ {
+ $group:
+ {
+ _id: "$_id",
+ o:
+ {
+ $sum:
+ {
+ $or: [
+ {$gt: ["$maxp", 15]},
+ {$lt: ["$minp", 10]}
+ ]
+ }
+ }
+ }
+ }
+ ],
+ [
+ {"_id": "a", "o": 0},
+ {"_id": "c", "o": 0},
+ {"_id": "b", "o": 0}
+ ],
+ 2);
+
// The second $group stage refers to both a top-level field and a sub-field twice which does not
// exist.
assertResultsMatchWithAndWithoutPushdown(
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index d16cf3ce880..78fee0dc2a0 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -2100,44 +2100,104 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
namespace {
template <typename F>
-struct FieldPathVisitor : public SelectiveConstExpressionVisitorBase {
+struct FieldPathAndCondPreVisitor : public SelectiveConstExpressionVisitorBase {
// To avoid overloaded-virtual warnings.
using SelectiveConstExpressionVisitorBase::visit;
- FieldPathVisitor(const F& fn) : _fn(fn) {}
+ FieldPathAndCondPreVisitor(const F& fn, int32_t& nestedCondLevel)
+ : _fn(fn), _nestedCondLevel(nestedCondLevel) {}
void visit(const ExpressionFieldPath* expr) final {
- _fn(expr);
+ _fn(expr, _nestedCondLevel);
+ }
+
+ void visit(const ExpressionCond* expr) final {
+ ++_nestedCondLevel;
+ }
+
+ void visit(const ExpressionSwitch* expr) final {
+ ++_nestedCondLevel;
+ }
+
+ void visit(const ExpressionIfNull* expr) final {
+ ++_nestedCondLevel;
+ }
+
+ void visit(const ExpressionAnd* expr) final {
+ ++_nestedCondLevel;
+ }
+
+ void visit(const ExpressionOr* expr) final {
+ ++_nestedCondLevel;
}
F _fn;
+ // Tracks the number of conditional expressions like $cond or $ifNull that are above us in the
+ // tree.
+ int32_t& _nestedCondLevel;
+};
+
+struct CondPostVisitor : public SelectiveConstExpressionVisitorBase {
+ // To avoid overloaded-virtual warnings.
+ using SelectiveConstExpressionVisitorBase::visit;
+
+ CondPostVisitor(int32_t& nestedCondLevel) : _nestedCondLevel(nestedCondLevel) {}
+
+ void visit(const ExpressionCond* expr) final {
+ --_nestedCondLevel;
+ }
+
+ void visit(const ExpressionSwitch* expr) final {
+ --_nestedCondLevel;
+ }
+
+ void visit(const ExpressionIfNull* expr) final {
+ --_nestedCondLevel;
+ }
+
+ void visit(const ExpressionAnd* expr) final {
+ --_nestedCondLevel;
+ }
+
+ void visit(const ExpressionOr* expr) final {
+ --_nestedCondLevel;
+ }
+
+ int32_t& _nestedCondLevel;
};
/**
* Walks through the 'expr' expression tree and whenever finds an 'ExpressionFieldPath', calls
* the 'fn' function. Type requirement for 'fn' is it must have a const 'ExpressionFieldPath'
- * pointer parameter.
+ * pointer parameter and 'nestedCondLevel' parameter.
*/
template <typename F>
void walkAndActOnFieldPaths(Expression* expr, const F& fn) {
- FieldPathVisitor<F> visitor(fn);
- ExpressionWalker walker(&visitor, nullptr /*inVisitor*/, nullptr /*postVisitor*/);
+ int32_t nestedCondLevel = 0;
+ FieldPathAndCondPreVisitor<F> preVisitor(fn, nestedCondLevel);
+ CondPostVisitor postVisitor(nestedCondLevel);
+ ExpressionWalker walker(&preVisitor, nullptr /*inVisitor*/, &postVisitor);
expression_walker::walk(expr, &walker);
}
/**
* Checks whether all field paths in 'idExpr' and all accumulator expressions are top-level ones.
*/
-bool checkAllFieldPathsAreTopLevel(const boost::intrusive_ptr<Expression>& idExpr,
- const std::vector<AccumulationStatement>& accStmts) {
- auto areAllTopLevelFields = true;
+bool areAllFieldPathsOptimizable(const boost::intrusive_ptr<Expression>& idExpr,
+ const std::vector<AccumulationStatement>& accStmts) {
+ auto areFieldPathsOptimizable = true;
- auto checkFieldPath = [&](const ExpressionFieldPath* fieldExpr) {
+ auto checkFieldPath = [&](const ExpressionFieldPath* fieldExpr, int32_t nestedCondLevel) {
// We optimize neither a field path for the top-level document itself (getPathLength() == 1)
// nor a field path that refers to a variable. We can optimize only top-level fields
// (getPathLength() == 2).
- if (fieldExpr->getFieldPath().getPathLength() != 2 || fieldExpr->isVariableReference()) {
- areAllTopLevelFields = false;
+ //
+ // The 'nestedCondLevel' being > 0 means that a field path is refered to below conditional
+ // expressions at the parent $group node, when we cannot optimize field path access and
+ // therefore, cannot avoid materialization.
+ if (nestedCondLevel > 0 || fieldExpr->getFieldPath().getPathLength() != 2 ||
+ fieldExpr->isVariableReference()) {
+ areFieldPathsOptimizable = false;
return;
}
};
@@ -2150,7 +2210,7 @@ bool checkAllFieldPathsAreTopLevel(const boost::intrusive_ptr<Expression>& idExp
walkAndActOnFieldPaths(accStmt.expr.argument.get(), checkFieldPath);
}
- return areAllTopLevelFields;
+ return areFieldPathsOptimizable;
}
/**
@@ -2176,7 +2236,7 @@ EvalStage optimizeFieldPaths(StageBuilderState& state,
auto searchInChildOutputs = !optionalRootSlot.has_value();
auto retEvalStage = std::move(childEvalStage);
- walkAndActOnFieldPaths(expr.get(), [&](const ExpressionFieldPath* fieldExpr) {
+ walkAndActOnFieldPaths(expr.get(), [&](const ExpressionFieldPath* fieldExpr, int32_t) {
// We optimize neither a field path for the top-level document itself nor a field path that
// refers to a variable instead of calling getField().
if (fieldExpr->getFieldPath().getPathLength() == 1 || fieldExpr->isVariableReference()) {
@@ -2470,10 +2530,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
const auto& accStmts = groupNode->accumulators;
auto childStageType = childNode->getType();
- auto areAllTopLevelFields = checkAllFieldPathsAreTopLevel(idExpr, accStmts);
-
auto childReqs = reqs.copy();
- if (childStageType == StageType::STAGE_GROUP && areAllTopLevelFields) {
+ if (childStageType == StageType::STAGE_GROUP && areAllFieldPathsOptimizable(idExpr, accStmts)) {
// Does not ask the GROUP child for the result slot to avoid unnecessary materialization if
// all fields are top-level fields. See the end of this function. For example, GROUP - GROUP
// - COLLSCAN case.