diff options
author | Charlie Swanson <charlie.swanson@mongodb.com> | 2022-06-06 22:30:53 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-08 13:44:59 +0000 |
commit | 904122d4b13bdc8382b36081afa47017ba7855dc (patch) | |
tree | 9b795f5b5774b520119a463e4cc6d679b84af5e5 | |
parent | 5c874317f98dbb061fdb98f65f2cc44c096677c1 (diff) | |
download | mongo-904122d4b13bdc8382b36081afa47017ba7855dc.tar.gz |
SERVER-66812 Fix bug with $group projection analysis
(cherry picked from commit 8a3af130c7dba9936b30e17d25dd52a5802658ad)
-rw-r--r-- | jstests/aggregation/optimize_away_pipeline.js | 84 | ||||
-rw-r--r-- | jstests/noPassthroughWithMongod/group_pushdown.js | 13 | ||||
-rw-r--r-- | src/mongo/db/query/canonical_query_encoder.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/planner_access.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/query/projection.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/projection.h | 4 | ||||
-rw-r--r-- | src/mongo/db/query/projection_test.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 23 |
10 files changed, 153 insertions, 43 deletions
diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js index 0f36c8b63c8..cd5ba0a5258 100644 --- a/jstests/aggregation/optimize_away_pipeline.js +++ b/jstests/aggregation/optimize_away_pipeline.js @@ -596,6 +596,88 @@ let projStage = getAggPlanStage(explain, "PROJECTION_SIMPLE"); assert.neq(null, projStage, explain); assertTransformByShape({a: 1, b: 1, _id: 0}, projStage.transformBy, explain); +// Asserts that, if group pushdown is enabled, we can remove a redundant projection stage before a +// group. +function assertProjectionCanBeRemovedBeforeGroup(pipeline) { + assertPipelineIfGroupPushdown( + // The projection and group should both be pushed down, and we expect to optimize away the + // projection after realizing that it will not affect the output of the group. + function() { + explain = coll.explain().aggregate(pipeline); + assertPipelineDoesNotUseAggregation( + {pipeline: pipeline, expectedStages: ["COLLSCAN", "GROUP"]}); + }, + // If group pushdown is not enabled we still expect the projection to be pushed down. + function() { + explain = coll.explain().aggregate(pipeline); + assertPipelineUsesAggregation({ + pipeline: pipeline, + expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "$group"], + }); + }); +} + +// Asserts that a projection stage is not optimized out of a pipeline with a projection and a group +// stage. +function assertProjectionIsNotRemoved(pipeline) { + assertPipelineIfGroupPushdown( + // The projection and group should both be pushed down, and we expect NOT to optimize away + // the projection. + function() { + explain = coll.explain().aggregate(pipeline); + assertPipelineDoesNotUseAggregation( + {pipeline: pipeline, expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "GROUP"]}); + }, + // If group pushdown is not enabled we still expect the projection to be pushed down. + function() { + explain = coll.explain().aggregate(pipeline); + assertPipelineUsesAggregation({ + pipeline: pipeline, + expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "$group"], + }); + }); +} + +// Test that an inclusion projection is optimized away if it is redundant/unnecessary. +assertProjectionCanBeRemovedBeforeGroup( + [{$project: {a: 1, b: 1}}, {$group: {_id: "$a", s: {$sum: "$b"}}}]); + +// TODO SERVER-66061 Test the same thing for an inclusion projection with a dotted path. +// assertProjectionCanBeRemovedBeforeGroup( +// [{$project: {'a.b': 1, 'b.c': 1}}, {$group: {_id: "$a.b", s: {$sum: "$b.c"}}}]); + +// Test that an inclusion projection is NOT optimized away if it is NOT redundant. This one fails to +// include a dependency of the $group and so will have an impact on the query results. +assertProjectionIsNotRemoved([{$project: {a: 1}}, {$group: {_id: "$a", s: {$sum: "$b"}}}]); +// TODO SERVER-66061 Analyze similar cases with dotted paths. For now, our ananlysis is limited to +// PROJECTION_SIMPLE which is not used for any dotted paths. + +// TODO SERVER-66061 This one could be removed, but is left for future work. +assertProjectionIsNotRemoved( + [{$project: {a: 1, b: 1}}, {$group: {_id: "$a.b", s: {$sum: "$b.c"}}}]); + +// Spinoff on the one above: Without supporting this kind of prefixing analysis, we can confuse +// ourselves with our dependency analysis. If the $group depends on both "path" and "path.subpath" +// then it will generate a $project on only "path" to express its dependency set. We then fail to +// optimize that out. +pipeline = [{$group: {_id: "$a.b", s: {$first: "$a"}}}]; +assertPipelineIfGroupPushdown( + // The group should be pushed down, and we expect NOT to have any projection. + // TODO SERVER-66061 we should consider fixing this. + function() { + explain = coll.explain().aggregate(pipeline); + assertPipelineDoesNotUseAggregation( + {pipeline: pipeline, expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "GROUP"]}); + }, + // If group pushdown is not enabled we do expect a projection to be pushed down. + function() { + explain = coll.explain().aggregate(pipeline); + assertPipelineUsesAggregation({ + pipeline: pipeline, + expectedStages: ["COLLSCAN", "PROJECTION_SIMPLE", "$group"], + }); + }); + // We generate a projection stage from dependency analysis, even if the pipeline begins with an // exclusion projection. pipeline = [{$project: {c: 0}}, {$group: {_id: "$a", b: {$sum: "$b"}}}]; @@ -634,7 +716,7 @@ pipeline = [{$project: {x: {$add: ["$a", 1]}}}]; assertPipelineDoesNotUseAggregation( {pipeline: pipeline, expectedStages: ["COLLSCAN", "PROJECTION_DEFAULT"]}); -pipeline = [{$project: {a: {$add: ["$a", 1]}}}, {$group: {_id: "$a", s: {$sum: "b"}}}]; +pipeline = [{$project: {a: {$add: ["$a", 1]}}}, {$group: {_id: "$a", s: {$sum: "$b"}}}]; assertPipelineIfGroupPushdown( // Test that a computed projection at the front of the pipeline is pushed down when there's a // finite dependency set. Additionally, the group pushdown shouldn't erase the computed diff --git a/jstests/noPassthroughWithMongod/group_pushdown.js b/jstests/noPassthroughWithMongod/group_pushdown.js index 153bdc4dce2..ba7b51a7b44 100644 --- a/jstests/noPassthroughWithMongod/group_pushdown.js +++ b/jstests/noPassthroughWithMongod/group_pushdown.js @@ -277,6 +277,19 @@ assertGroupPushdown(coll, [{$group: {_id: {"i": "$item"}, s: {$sum: "$price"}}}], [{_id: {i: "a"}, s: 15}, {_id: {i: "b"}, s: 30}, {_id: {i: "c"}, s: 5}]); +// Test that we can push down a $group and a projection. +assertGroupPushdown( + coll, + [{$project: {_id: 0, item: 1, price: 1}}, {$group: {_id: {i: "$item"}, s: {$sum: "$price"}}}], + [{_id: {i: "a"}, s: 15}, {_id: {i: "b"}, s: 30}, {_id: {i: "c"}, s: 5}]); + +// Test that the results are as expected if the projection comes first and removes a field that the +// $group stage needs. +assertGroupPushdown( + coll, + [{$project: {_id: 0, item: 1}}, {$group: {_id: {i: "$item"}, s: {$sum: "$price"}}}], + [{_id: {i: "a"}, s: 0}, {_id: {i: "b"}, s: 0}, {_id: {i: "c"}, s: 0}]); + // Run a group with spilling on and check that $group is pushed down. assertGroupPushdown(coll, [{$group: {_id: "$item", s: {$sum: "$price"}}}], diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp index 303cdb6009a..ccbcce36635 100644 --- a/src/mongo/db/query/canonical_query_encoder.cpp +++ b/src/mongo/db/query/canonical_query_encoder.cpp @@ -584,11 +584,11 @@ void encodeKeyForProj(const projection_ast::Projection* proj, StringBuilder* key return; } - std::vector<std::string> requiredFields = proj->getRequiredFields(); + std::set<std::string> requiredFields = proj->getRequiredFields(); // If the only requirement is that $sortKey be included with some value, we just act as if the // entire document is needed. - if (requiredFields.size() == 1 && requiredFields.front() == "$sortKey") { + if (requiredFields.size() == 1 && *requiredFields.begin() == "$sortKey") { return; } @@ -597,7 +597,6 @@ void encodeKeyForProj(const projection_ast::Projection* proj, StringBuilder* key *keyBuilder << kEncodeProjectionSection; // Encode the fields required by the projection in order. - std::sort(requiredFields.begin(), requiredFields.end()); bool isFirst = true; for (auto&& requiredField : requiredFields) { invariant(!requiredField.empty()); diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 9e60f455834..dbf0751d60e 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -1173,7 +1173,7 @@ bool isCoveredNullQuery(const CanonicalQuery& query, // Note that it is not possible to project onto dotted paths of _id here, since they may be // null or missing, and the index cannot differentiate between the two cases, so we would // still need a FETCH stage. - return projFields.size() == 1 && projFields[0] == "_id"; + return projFields.size() == 1 && *projFields.begin() == "_id"; } return false; diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 68cec2de318..4fb673d8bfc 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -34,6 +34,7 @@ #include <set> #include <vector> +#include "mongo/base/simple_string_data_comparator.h" #include "mongo/bson/simple_bsonelement_comparator.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/index/expression_params.h" @@ -336,9 +337,9 @@ void geoSkipValidationOn(const std::set<StringData>& twoDSphereFields, /** * If any field is missing from the list of fields the projection wants, we are not covered. */ -auto providesAllFields(const vector<std::string>& fields, const QuerySolutionNode& solnRoot) { - for (size_t i = 0; i < fields.size(); ++i) { - if (!solnRoot.hasField(fields[i])) +auto providesAllFields(const std::set<std::string>& fields, const QuerySolutionNode& solnRoot) { + for (auto&& field : fields) { + if (!solnRoot.hasField(field)) return false; } return true; @@ -554,6 +555,20 @@ bool canUseSimpleSort(const QuerySolutionNode& solnRoot, !(plannerParams.options & QueryPlannerParams::PRESERVE_RECORD_ID); } +/** + * Returns true if 'setS' is a non-strict subset of 'setT'. + * + * The types of the sets are permitted to be different to allow checking something with compatible + * but different types e.g. std::set<std::string> and StringDataUnorderedMap. + */ +template <typename SetL, typename SetR> +bool isSubset(const SetL& setL, const SetR& setR) { + return setL.size() <= setR.size() && + std::all_of(setL.begin(), setL.end(), [&setR](auto&& lElem) { + return setR.find(lElem) != setR.end(); + }); +} + void removeProjectSimpleBelowGroupRecursive(QuerySolutionNode* solnRoot) { if (solnRoot == nullptr) { return; @@ -576,16 +591,12 @@ void removeProjectSimpleBelowGroupRecursive(QuerySolutionNode* solnRoot) { // projection, it would have type PROJECTION_DEFAULT. return; } - + auto* projectNode = checked_cast<ProjectionNodeSimple*>(projectNodeCandidate); // Check to see if the projectNode's field set is a super set of the groupNodes. - auto projectNode = static_cast<ProjectionNodeSimple*>(projectNodeCandidate); - auto projectFields = projectNode->proj.getRequiredFields(); - if (!std::any_of(projectFields.begin(), projectFields.end(), [groupNode](auto&& fld) { - return groupNode->requiredFields.contains(fld); - })) { + if (!isSubset(groupNode->requiredFields, projectNode->proj.getRequiredFields())) { // The dependency set of the GROUP stage is wider than the projectNode field set. return; - }; + } // Attach the projectNode's child to the groupNode's child. groupNode->children[0] = projectNode->children[0]; diff --git a/src/mongo/db/query/projection.cpp b/src/mongo/db/query/projection.cpp index 73caaa8f312..3c93367ae8c 100644 --- a/src/mongo/db/query/projection.cpp +++ b/src/mongo/db/query/projection.cpp @@ -49,8 +49,8 @@ struct DepsAnalysisData { fieldDependencyTracker.fields.insert(fieldName); } - std::vector<std::string> requiredFields() const { - return {fieldDependencyTracker.fields.begin(), fieldDependencyTracker.fields.end()}; + std::set<std::string> requiredFields() const { + return fieldDependencyTracker.fields; } }; diff --git a/src/mongo/db/query/projection.h b/src/mongo/db/query/projection.h index 80ede2a7a1a..9987dd641db 100644 --- a/src/mongo/db/query/projection.h +++ b/src/mongo/db/query/projection.h @@ -48,7 +48,7 @@ struct ProjectionDependencies { bool hasExpressions = false; // Which fields are necessary to perform the projection, or boost::none if all are required. - boost::optional<std::vector<std::string>> requiredFields; + boost::optional<std::set<std::string>> requiredFields; bool hasDottedPath = false; @@ -94,7 +94,7 @@ public: * Return which fields are required to compute the projection, assuming the entire document is * not needed. */ - const std::vector<std::string>& getRequiredFields() const { + const std::set<std::string>& getRequiredFields() const { invariant(_type == ProjectType::kInclusion); return *_deps.requiredFields; } diff --git a/src/mongo/db/query/projection_test.cpp b/src/mongo/db/query/projection_test.cpp index 35abfb3e92f..1b9bd564104 100644 --- a/src/mongo/db/query/projection_test.cpp +++ b/src/mongo/db/query/projection_test.cpp @@ -117,8 +117,9 @@ TEST(QueryProjectionTest, MakeSingleFieldInclusion) { ASSERT_FALSE(proj.requiresDocument()); const auto& fields = proj.getRequiredFields(); ASSERT_EQUALS(fields.size(), 2U); - ASSERT_EQUALS(fields[0], "_id"); - ASSERT_EQUALS(fields[1], "a"); + auto fieldsIt = fields.begin(); + ASSERT_EQUALS(*fieldsIt++, "_id"); + ASSERT_EQUALS(*fieldsIt++, "a"); } TEST(QueryProjectionTest, MakeSingleFieldInclusionNoId) { @@ -126,7 +127,7 @@ TEST(QueryProjectionTest, MakeSingleFieldInclusionNoId) { ASSERT_FALSE(proj.requiresDocument()); const auto& fields = proj.getRequiredFields(); ASSERT_EQUALS(fields.size(), 1U); - ASSERT_EQUALS(fields[0], "a"); + ASSERT_EQUALS(*fields.begin(), "a"); } TEST(QueryProjectionTest, MakeSingleFieldId) { @@ -134,7 +135,7 @@ TEST(QueryProjectionTest, MakeSingleFieldId) { ASSERT_FALSE(proj.requiresDocument()); const auto& fields = proj.getRequiredFields(); ASSERT_EQUALS(fields.size(), 1U); - ASSERT_EQUALS(fields[0], "_id"); + ASSERT_EQUALS(*fields.begin(), "_id"); } TEST(QueryProjectionTest, MakeSingleFieldNoIdBoolean) { @@ -142,7 +143,7 @@ TEST(QueryProjectionTest, MakeSingleFieldNoIdBoolean) { ASSERT_FALSE(proj.requiresDocument()); const auto& fields = proj.getRequiredFields(); ASSERT_EQUALS(fields.size(), 1U); - ASSERT_EQUALS(fields[0], "a"); + ASSERT_EQUALS(*fields.begin(), "a"); } TEST(QueryProjectionTest, MakeSingleFieldFalseIdBoolean) { @@ -150,7 +151,7 @@ TEST(QueryProjectionTest, MakeSingleFieldFalseIdBoolean) { ASSERT_FALSE(proj.requiresDocument()); const auto& fields = proj.getRequiredFields(); ASSERT_EQUALS(fields.size(), 1U); - ASSERT_EQUALS(fields[0], "a"); + ASSERT_EQUALS(*fields.begin(), "a"); } // @@ -504,7 +505,7 @@ TEST(QueryProjectionTest, ProjectionWithExpressionIsNotSimple) { const auto& fields = proj.getRequiredFields(); ASSERT_EQ(fields.size(), 1); - ASSERT_EQ(fields[0], "_id"); + ASSERT_EQ(*fields.begin(), "_id"); } TEST(QueryProjectionTest, ProjectionWithTopLevelExpressionConstantDoesNotRequireField) { @@ -513,8 +514,9 @@ TEST(QueryProjectionTest, ProjectionWithTopLevelExpressionConstantDoesNotRequire const auto& fields = proj.getRequiredFields(); ASSERT_EQ(fields.size(), 2); - ASSERT_EQ(fields[0], "_id"); - ASSERT_EQ(fields[1], "b"); + auto fieldsIt = fields.begin(); + ASSERT_EQ(*fieldsIt++, "_id"); + ASSERT_EQ(*fieldsIt++, "b"); } TEST(QueryProjectionTest, ProjectionWithROOTNeedsWholeDocument) { @@ -530,8 +532,9 @@ TEST(QueryProjectionTest, ProjectionWithFieldPathExpressionDoesNotNeedWholeDocum const auto& fields = proj.getRequiredFields(); ASSERT_EQ(fields.size(), 2); - ASSERT_EQ(fields[0], "b"); - ASSERT_EQ(fields[1], "c"); + auto fieldsIt = fields.begin(); + ASSERT_EQ(*fieldsIt++, "b"); + ASSERT_EQ(*fieldsIt++, "c"); } TEST(QueryProjectionTest, AssignmentToDottedPathRequiresFirstComponent) { @@ -541,7 +544,7 @@ TEST(QueryProjectionTest, AssignmentToDottedPathRequiresFirstComponent) { const auto& fields = proj.getRequiredFields(); ASSERT_EQ(fields.size(), 1); - ASSERT_EQ(fields[0], "a"); + ASSERT_EQ(*fields.begin(), "a"); } } // namespace diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 7b2da1338e6..7b4c144c906 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -177,8 +177,7 @@ std::pair<DepsTracker, DepsTracker> computeDeps(const CanonicalQuery& query) { outputDeps.needWholeDocument = true; return {std::move(filterDeps), std::move(outputDeps)}; } - auto projectionFields = query.getProj()->getRequiredFields(); - outputDeps.fields.insert(projectionFields.begin(), projectionFields.end()); + outputDeps.fields = query.getProj()->getRequiredFields(); if (auto sortPattern = query.getSortPattern()) { sortPattern->addDependencies(&outputDeps); } diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 9ace5b5e150..aea720a0fda 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -1737,16 +1737,19 @@ SlotBasedStageBuilder::buildProjectionSimple(const QuerySolutionNode* root, const auto childResult = outputs.get(kResult); outputs.set(kResult, _slotIdGenerator.generate()); - inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage), - outputs.get(kResult), - childResult, - sbe::MakeBsonObjStage::FieldBehavior::keep, - pn->proj.getRequiredFields(), - std::vector<std::string>{}, - sbe::value::SlotVector{}, - true, - false, - root->nodeId()); + inputStage = sbe::makeS<sbe::MakeBsonObjStage>( + std::move(inputStage), + outputs.get(kResult), + childResult, + sbe::MakeBsonObjStage::FieldBehavior::keep, + // TODO SERVER-67039 take a set instead of a vector here. + std::vector<std::string>{pn->proj.getRequiredFields().begin(), + pn->proj.getRequiredFields().end()}, + std::vector<std::string>{}, + sbe::value::SlotVector{}, + true, + false, + root->nodeId()); return {std::move(inputStage), std::move(outputs)}; } |