summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2022-06-06 22:30:53 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-08 13:44:59 +0000
commit904122d4b13bdc8382b36081afa47017ba7855dc (patch)
tree9b795f5b5774b520119a463e4cc6d679b84af5e5
parent5c874317f98dbb061fdb98f65f2cc44c096677c1 (diff)
downloadmongo-904122d4b13bdc8382b36081afa47017ba7855dc.tar.gz
SERVER-66812 Fix bug with $group projection analysis
(cherry picked from commit 8a3af130c7dba9936b30e17d25dd52a5802658ad)
-rw-r--r--jstests/aggregation/optimize_away_pipeline.js84
-rw-r--r--jstests/noPassthroughWithMongod/group_pushdown.js13
-rw-r--r--src/mongo/db/query/canonical_query_encoder.cpp5
-rw-r--r--src/mongo/db/query/planner_access.cpp2
-rw-r--r--src/mongo/db/query/planner_analysis.cpp31
-rw-r--r--src/mongo/db/query/projection.cpp4
-rw-r--r--src/mongo/db/query/projection.h4
-rw-r--r--src/mongo/db/query/projection_test.cpp27
-rw-r--r--src/mongo/db/query/query_planner.cpp3
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp23
10 files changed, 153 insertions, 43 deletions
diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js
index 0f36c8b63c8..cd5ba0a5258 100644
--- a/jstests/aggregation/optimize_away_pipeline.js
+++ b/jstests/aggregation/optimize_away_pipeline.js
@@ -596,6 +596,88 @@ let projStage = getAggPlanStage(explain, "PROJECTION_SIMPLE");
assert.neq(null, projStage, explain);
assertTransformByShape({a: 1, b: 1, _id: 0}, projStage.transformBy, explain);
+// Asserts that, if group pushdown is enabled, we can remove a redundant projection stage before a
+// group.
+function assertProjectionCanBeRemovedBeforeGroup(pipeline) {
+ assertPipelineIfGroupPushdown(
+ // The projection and group should both be pushed down, and we expect to optimize away the
+ // projection after realizing that it will not affect the output of the group.
+ function() {
+ explain = coll.explain().aggregate(pipeline);
+ assertPipelineDoesNotUseAggregation(
+ {pipeline: pipeline, expectedStages: ["COLLSCAN", "GROUP"]});
+ },
+ // If group pushdown is not enabled we still expect the projection to be pushed down.
+ function() {
+ explain = coll.explain().aggregate(pipeline);
+ assertPipelineUsesAggregation({
+ pipeline: pipeline,
+ expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "$group"],
+ });
+ });
+}
+
+// Asserts that a projection stage is not optimized out of a pipeline with a projection and a group
+// stage.
+function assertProjectionIsNotRemoved(pipeline) {
+ assertPipelineIfGroupPushdown(
+ // The projection and group should both be pushed down, and we expect NOT to optimize away
+ // the projection.
+ function() {
+ explain = coll.explain().aggregate(pipeline);
+ assertPipelineDoesNotUseAggregation(
+ {pipeline: pipeline, expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "GROUP"]});
+ },
+ // If group pushdown is not enabled we still expect the projection to be pushed down.
+ function() {
+ explain = coll.explain().aggregate(pipeline);
+ assertPipelineUsesAggregation({
+ pipeline: pipeline,
+ expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "$group"],
+ });
+ });
+}
+
+// Test that an inclusion projection is optimized away if it is redundant/unnecessary.
+assertProjectionCanBeRemovedBeforeGroup(
+ [{$project: {a: 1, b: 1}}, {$group: {_id: "$a", s: {$sum: "$b"}}}]);
+
+// TODO SERVER-66061 Test the same thing for an inclusion projection with a dotted path.
+// assertProjectionCanBeRemovedBeforeGroup(
+// [{$project: {'a.b': 1, 'b.c': 1}}, {$group: {_id: "$a.b", s: {$sum: "$b.c"}}}]);
+
+// Test that an inclusion projection is NOT optimized away if it is NOT redundant. This one fails to
+// include a dependency of the $group and so will have an impact on the query results.
+assertProjectionIsNotRemoved([{$project: {a: 1}}, {$group: {_id: "$a", s: {$sum: "$b"}}}]);
+// TODO SERVER-66061 Analyze similar cases with dotted paths. For now, our ananlysis is limited to
+// PROJECTION_SIMPLE which is not used for any dotted paths.
+
+// TODO SERVER-66061 This one could be removed, but is left for future work.
+assertProjectionIsNotRemoved(
+ [{$project: {a: 1, b: 1}}, {$group: {_id: "$a.b", s: {$sum: "$b.c"}}}]);
+
+// Spinoff on the one above: Without supporting this kind of prefixing analysis, we can confuse
+// ourselves with our dependency analysis. If the $group depends on both "path" and "path.subpath"
+// then it will generate a $project on only "path" to express its dependency set. We then fail to
+// optimize that out.
+pipeline = [{$group: {_id: "$a.b", s: {$first: "$a"}}}];
+assertPipelineIfGroupPushdown(
+ // The group should be pushed down, and we expect NOT to have any projection.
+ // TODO SERVER-66061 we should consider fixing this.
+ function() {
+ explain = coll.explain().aggregate(pipeline);
+ assertPipelineDoesNotUseAggregation(
+ {pipeline: pipeline, expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "GROUP"]});
+ },
+ // If group pushdown is not enabled we do expect a projection to be pushed down.
+ function() {
+ explain = coll.explain().aggregate(pipeline);
+ assertPipelineUsesAggregation({
+ pipeline: pipeline,
+ expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "$group"],
+ });
+ });
+
// We generate a projection stage from dependency analysis, even if the pipeline begins with an
// exclusion projection.
pipeline = [{$project: {c: 0}}, {$group: {_id: "$a", b: {$sum: "$b"}}}];
@@ -634,7 +716,7 @@ pipeline = [{$project: {x: {$add: ["$a", 1]}}}];
assertPipelineDoesNotUseAggregation(
{pipeline: pipeline, expectedStages: ["COLLSCAN", "PROJECTION_DEFAULT"]});
-pipeline = [{$project: {a: {$add: ["$a", 1]}}}, {$group: {_id: "$a", s: {$sum: "b"}}}];
+pipeline = [{$project: {a: {$add: ["$a", 1]}}}, {$group: {_id: "$a", s: {$sum: "$b"}}}];
assertPipelineIfGroupPushdown(
// Test that a computed projection at the front of the pipeline is pushed down when there's a
// finite dependency set. Additionally, the group pushdown shouldn't erase the computed
diff --git a/jstests/noPassthroughWithMongod/group_pushdown.js b/jstests/noPassthroughWithMongod/group_pushdown.js
index 153bdc4dce2..ba7b51a7b44 100644
--- a/jstests/noPassthroughWithMongod/group_pushdown.js
+++ b/jstests/noPassthroughWithMongod/group_pushdown.js
@@ -277,6 +277,19 @@ assertGroupPushdown(coll,
[{$group: {_id: {"i": "$item"}, s: {$sum: "$price"}}}],
[{_id: {i: "a"}, s: 15}, {_id: {i: "b"}, s: 30}, {_id: {i: "c"}, s: 5}]);
+// Test that we can push down a $group and a projection.
+assertGroupPushdown(
+ coll,
+ [{$project: {_id: 0, item: 1, price: 1}}, {$group: {_id: {i: "$item"}, s: {$sum: "$price"}}}],
+ [{_id: {i: "a"}, s: 15}, {_id: {i: "b"}, s: 30}, {_id: {i: "c"}, s: 5}]);
+
+// Test that the results are as expected if the projection comes first and removes a field that the
+// $group stage needs.
+assertGroupPushdown(
+ coll,
+ [{$project: {_id: 0, item: 1}}, {$group: {_id: {i: "$item"}, s: {$sum: "$price"}}}],
+ [{_id: {i: "a"}, s: 0}, {_id: {i: "b"}, s: 0}, {_id: {i: "c"}, s: 0}]);
+
// Run a group with spilling on and check that $group is pushed down.
assertGroupPushdown(coll,
[{$group: {_id: "$item", s: {$sum: "$price"}}}],
diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp
index 303cdb6009a..ccbcce36635 100644
--- a/src/mongo/db/query/canonical_query_encoder.cpp
+++ b/src/mongo/db/query/canonical_query_encoder.cpp
@@ -584,11 +584,11 @@ void encodeKeyForProj(const projection_ast::Projection* proj, StringBuilder* key
return;
}
- std::vector<std::string> requiredFields = proj->getRequiredFields();
+ std::set<std::string> requiredFields = proj->getRequiredFields();
// If the only requirement is that $sortKey be included with some value, we just act as if the
// entire document is needed.
- if (requiredFields.size() == 1 && requiredFields.front() == "$sortKey") {
+ if (requiredFields.size() == 1 && *requiredFields.begin() == "$sortKey") {
return;
}
@@ -597,7 +597,6 @@ void encodeKeyForProj(const projection_ast::Projection* proj, StringBuilder* key
*keyBuilder << kEncodeProjectionSection;
// Encode the fields required by the projection in order.
- std::sort(requiredFields.begin(), requiredFields.end());
bool isFirst = true;
for (auto&& requiredField : requiredFields) {
invariant(!requiredField.empty());
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index 9e60f455834..dbf0751d60e 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -1173,7 +1173,7 @@ bool isCoveredNullQuery(const CanonicalQuery& query,
// Note that it is not possible to project onto dotted paths of _id here, since they may be
// null or missing, and the index cannot differentiate between the two cases, so we would
// still need a FETCH stage.
- return projFields.size() == 1 && projFields[0] == "_id";
+ return projFields.size() == 1 && *projFields.begin() == "_id";
}
return false;
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 68cec2de318..4fb673d8bfc 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -34,6 +34,7 @@
#include <set>
#include <vector>
+#include "mongo/base/simple_string_data_comparator.h"
#include "mongo/bson/simple_bsonelement_comparator.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/index/expression_params.h"
@@ -336,9 +337,9 @@ void geoSkipValidationOn(const std::set<StringData>& twoDSphereFields,
/**
* If any field is missing from the list of fields the projection wants, we are not covered.
*/
-auto providesAllFields(const vector<std::string>& fields, const QuerySolutionNode& solnRoot) {
- for (size_t i = 0; i < fields.size(); ++i) {
- if (!solnRoot.hasField(fields[i]))
+auto providesAllFields(const std::set<std::string>& fields, const QuerySolutionNode& solnRoot) {
+ for (auto&& field : fields) {
+ if (!solnRoot.hasField(field))
return false;
}
return true;
@@ -554,6 +555,20 @@ bool canUseSimpleSort(const QuerySolutionNode& solnRoot,
!(plannerParams.options & QueryPlannerParams::PRESERVE_RECORD_ID);
}
+/**
+ * Returns true if 'setS' is a non-strict subset of 'setT'.
+ *
+ * The types of the sets are permitted to be different to allow checking something with compatible
+ * but different types e.g. std::set<std::string> and StringDataUnorderedMap.
+ */
+template <typename SetL, typename SetR>
+bool isSubset(const SetL& setL, const SetR& setR) {
+ return setL.size() <= setR.size() &&
+ std::all_of(setL.begin(), setL.end(), [&setR](auto&& lElem) {
+ return setR.find(lElem) != setR.end();
+ });
+}
+
void removeProjectSimpleBelowGroupRecursive(QuerySolutionNode* solnRoot) {
if (solnRoot == nullptr) {
return;
@@ -576,16 +591,12 @@ void removeProjectSimpleBelowGroupRecursive(QuerySolutionNode* solnRoot) {
// projection, it would have type PROJECTION_DEFAULT.
return;
}
-
+ auto* projectNode = checked_cast<ProjectionNodeSimple*>(projectNodeCandidate);
// Check to see if the projectNode's field set is a super set of the groupNodes.
- auto projectNode = static_cast<ProjectionNodeSimple*>(projectNodeCandidate);
- auto projectFields = projectNode->proj.getRequiredFields();
- if (!std::any_of(projectFields.begin(), projectFields.end(), [groupNode](auto&& fld) {
- return groupNode->requiredFields.contains(fld);
- })) {
+ if (!isSubset(groupNode->requiredFields, projectNode->proj.getRequiredFields())) {
// The dependency set of the GROUP stage is wider than the projectNode field set.
return;
- };
+ }
// Attach the projectNode's child to the groupNode's child.
groupNode->children[0] = projectNode->children[0];
diff --git a/src/mongo/db/query/projection.cpp b/src/mongo/db/query/projection.cpp
index 73caaa8f312..3c93367ae8c 100644
--- a/src/mongo/db/query/projection.cpp
+++ b/src/mongo/db/query/projection.cpp
@@ -49,8 +49,8 @@ struct DepsAnalysisData {
fieldDependencyTracker.fields.insert(fieldName);
}
- std::vector<std::string> requiredFields() const {
- return {fieldDependencyTracker.fields.begin(), fieldDependencyTracker.fields.end()};
+ std::set<std::string> requiredFields() const {
+ return fieldDependencyTracker.fields;
}
};
diff --git a/src/mongo/db/query/projection.h b/src/mongo/db/query/projection.h
index 80ede2a7a1a..9987dd641db 100644
--- a/src/mongo/db/query/projection.h
+++ b/src/mongo/db/query/projection.h
@@ -48,7 +48,7 @@ struct ProjectionDependencies {
bool hasExpressions = false;
// Which fields are necessary to perform the projection, or boost::none if all are required.
- boost::optional<std::vector<std::string>> requiredFields;
+ boost::optional<std::set<std::string>> requiredFields;
bool hasDottedPath = false;
@@ -94,7 +94,7 @@ public:
* Return which fields are required to compute the projection, assuming the entire document is
* not needed.
*/
- const std::vector<std::string>& getRequiredFields() const {
+ const std::set<std::string>& getRequiredFields() const {
invariant(_type == ProjectType::kInclusion);
return *_deps.requiredFields;
}
diff --git a/src/mongo/db/query/projection_test.cpp b/src/mongo/db/query/projection_test.cpp
index 35abfb3e92f..1b9bd564104 100644
--- a/src/mongo/db/query/projection_test.cpp
+++ b/src/mongo/db/query/projection_test.cpp
@@ -117,8 +117,9 @@ TEST(QueryProjectionTest, MakeSingleFieldInclusion) {
ASSERT_FALSE(proj.requiresDocument());
const auto& fields = proj.getRequiredFields();
ASSERT_EQUALS(fields.size(), 2U);
- ASSERT_EQUALS(fields[0], "_id");
- ASSERT_EQUALS(fields[1], "a");
+ auto fieldsIt = fields.begin();
+ ASSERT_EQUALS(*fieldsIt++, "_id");
+ ASSERT_EQUALS(*fieldsIt++, "a");
}
TEST(QueryProjectionTest, MakeSingleFieldInclusionNoId) {
@@ -126,7 +127,7 @@ TEST(QueryProjectionTest, MakeSingleFieldInclusionNoId) {
ASSERT_FALSE(proj.requiresDocument());
const auto& fields = proj.getRequiredFields();
ASSERT_EQUALS(fields.size(), 1U);
- ASSERT_EQUALS(fields[0], "a");
+ ASSERT_EQUALS(*fields.begin(), "a");
}
TEST(QueryProjectionTest, MakeSingleFieldId) {
@@ -134,7 +135,7 @@ TEST(QueryProjectionTest, MakeSingleFieldId) {
ASSERT_FALSE(proj.requiresDocument());
const auto& fields = proj.getRequiredFields();
ASSERT_EQUALS(fields.size(), 1U);
- ASSERT_EQUALS(fields[0], "_id");
+ ASSERT_EQUALS(*fields.begin(), "_id");
}
TEST(QueryProjectionTest, MakeSingleFieldNoIdBoolean) {
@@ -142,7 +143,7 @@ TEST(QueryProjectionTest, MakeSingleFieldNoIdBoolean) {
ASSERT_FALSE(proj.requiresDocument());
const auto& fields = proj.getRequiredFields();
ASSERT_EQUALS(fields.size(), 1U);
- ASSERT_EQUALS(fields[0], "a");
+ ASSERT_EQUALS(*fields.begin(), "a");
}
TEST(QueryProjectionTest, MakeSingleFieldFalseIdBoolean) {
@@ -150,7 +151,7 @@ TEST(QueryProjectionTest, MakeSingleFieldFalseIdBoolean) {
ASSERT_FALSE(proj.requiresDocument());
const auto& fields = proj.getRequiredFields();
ASSERT_EQUALS(fields.size(), 1U);
- ASSERT_EQUALS(fields[0], "a");
+ ASSERT_EQUALS(*fields.begin(), "a");
}
//
@@ -504,7 +505,7 @@ TEST(QueryProjectionTest, ProjectionWithExpressionIsNotSimple) {
const auto& fields = proj.getRequiredFields();
ASSERT_EQ(fields.size(), 1);
- ASSERT_EQ(fields[0], "_id");
+ ASSERT_EQ(*fields.begin(), "_id");
}
TEST(QueryProjectionTest, ProjectionWithTopLevelExpressionConstantDoesNotRequireField) {
@@ -513,8 +514,9 @@ TEST(QueryProjectionTest, ProjectionWithTopLevelExpressionConstantDoesNotRequire
const auto& fields = proj.getRequiredFields();
ASSERT_EQ(fields.size(), 2);
- ASSERT_EQ(fields[0], "_id");
- ASSERT_EQ(fields[1], "b");
+ auto fieldsIt = fields.begin();
+ ASSERT_EQ(*fieldsIt++, "_id");
+ ASSERT_EQ(*fieldsIt++, "b");
}
TEST(QueryProjectionTest, ProjectionWithROOTNeedsWholeDocument) {
@@ -530,8 +532,9 @@ TEST(QueryProjectionTest, ProjectionWithFieldPathExpressionDoesNotNeedWholeDocum
const auto& fields = proj.getRequiredFields();
ASSERT_EQ(fields.size(), 2);
- ASSERT_EQ(fields[0], "b");
- ASSERT_EQ(fields[1], "c");
+ auto fieldsIt = fields.begin();
+ ASSERT_EQ(*fieldsIt++, "b");
+ ASSERT_EQ(*fieldsIt++, "c");
}
TEST(QueryProjectionTest, AssignmentToDottedPathRequiresFirstComponent) {
@@ -541,7 +544,7 @@ TEST(QueryProjectionTest, AssignmentToDottedPathRequiresFirstComponent) {
const auto& fields = proj.getRequiredFields();
ASSERT_EQ(fields.size(), 1);
- ASSERT_EQ(fields[0], "a");
+ ASSERT_EQ(*fields.begin(), "a");
}
} // namespace
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index 7b2da1338e6..7b4c144c906 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -177,8 +177,7 @@ std::pair<DepsTracker, DepsTracker> computeDeps(const CanonicalQuery& query) {
outputDeps.needWholeDocument = true;
return {std::move(filterDeps), std::move(outputDeps)};
}
- auto projectionFields = query.getProj()->getRequiredFields();
- outputDeps.fields.insert(projectionFields.begin(), projectionFields.end());
+ outputDeps.fields = query.getProj()->getRequiredFields();
if (auto sortPattern = query.getSortPattern()) {
sortPattern->addDependencies(&outputDeps);
}
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index 9ace5b5e150..aea720a0fda 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -1737,16 +1737,19 @@ SlotBasedStageBuilder::buildProjectionSimple(const QuerySolutionNode* root,
const auto childResult = outputs.get(kResult);
outputs.set(kResult, _slotIdGenerator.generate());
- inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage),
- outputs.get(kResult),
- childResult,
- sbe::MakeBsonObjStage::FieldBehavior::keep,
- pn->proj.getRequiredFields(),
- std::vector<std::string>{},
- sbe::value::SlotVector{},
- true,
- false,
- root->nodeId());
+ inputStage = sbe::makeS<sbe::MakeBsonObjStage>(
+ std::move(inputStage),
+ outputs.get(kResult),
+ childResult,
+ sbe::MakeBsonObjStage::FieldBehavior::keep,
+ // TODO SERVER-67039 take a set instead of a vector here.
+ std::vector<std::string>{pn->proj.getRequiredFields().begin(),
+ pn->proj.getRequiredFields().end()},
+ std::vector<std::string>{},
+ sbe::value::SlotVector{},
+ true,
+ false,
+ root->nodeId());
return {std::move(inputStage), std::move(outputs)};
}