diff options
author | David Storch <david.storch@mongodb.com> | 2019-10-22 21:32:40 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-10-22 21:32:40 +0000 |
commit | 8d048a3bb2f0f2f81cf99ce76ff21112bf3963d6 (patch) | |
tree | 8ac057129946e378c53730914e15d366ca0b0c92 | |
parent | 0a5b8a92ed440f9dbc35d8e3d22cde43fab164f6 (diff) | |
download | mongo-8d048a3bb2f0f2f81cf99ce76ff21112bf3963d6.tar.gz |
SERVER-7568 Push $sort into PlanStage layer even for blocking SORT plans.
This change results in the multi-planning mechanism
evaluating both non-blocking and blocking plans for the
$sort when possible. The system should no longer select a
non-blocking plan when a plan with a SORT stage is superior.
32 files changed, 526 insertions, 500 deletions
diff --git a/jstests/aggregation/bugs/server22093.js b/jstests/aggregation/bugs/server22093.js index 618c65f85b7..d119b928a28 100644 --- a/jstests/aggregation/bugs/server22093.js +++ b/jstests/aggregation/bugs/server22093.js @@ -49,4 +49,29 @@ assert(planHasStage(db, explained.stages[0].$cursor.queryPlanner.winningPlan, "C // A $match that is not a single range cannot use the COUNT_SCAN optimization. explained = coll.explain().aggregate([{$match: {foo: {$in: [0, 1]}}}, {$count: "count"}]); assert(!planHasStage(db, explained.stages[0].$cursor.queryPlanner.winningPlan, "COUNT_SCAN")); + +// Test that COUNT_SCAN can be used when there is a $sort. +explained = coll.explain().aggregate([{$sort: {foo: 1}}, {$count: "count"}]); +assert(aggPlanHasStage(explained, "COUNT_SCAN"), explained); + +// Test that a forward COUNT_SCAN plan is chosen even when there is a $sort in the direction +// opposite that of the index. +explained = coll.explain().aggregate([{$sort: {foo: -1}}, {$count: "count"}]); +let countScan = getAggPlanStage(explained, "COUNT_SCAN"); +assert.neq(null, countScan, explained); +assert.eq({foo: MinKey}, countScan.indexBounds.startKey, explained); +assert.eq(true, countScan.indexBounds.startKeyInclusive, explained); +assert.eq({foo: MaxKey}, countScan.indexBounds.endKey, explained); +assert.eq(true, countScan.indexBounds.endKeyInclusive, explained); + +// Test that the inclusivity/exclusivity of the index bounds for COUNT_SCAN are correct when there +// is a $sort in the opposite direction of the index. +explained = coll.explain().aggregate( + [{$match: {foo: {$gte: 0, $lt: 10}}}, {$sort: {foo: -1}}, {$count: "count"}]); +countScan = getAggPlanStage(explained, "COUNT_SCAN"); +assert.neq(null, countScan, explained); +assert.eq({foo: 0}, countScan.indexBounds.startKey, explained); +assert.eq(true, countScan.indexBounds.startKeyInclusive, explained); +assert.eq({foo: 10}, countScan.indexBounds.endKey, explained); +assert.eq(false, countScan.indexBounds.endKeyInclusive, explained); }()); diff --git a/jstests/aggregation/bugs/skip_limit_overflow.js b/jstests/aggregation/bugs/skip_limit_overflow.js index 597518d069b..0b1fa027a48 100644 --- a/jstests/aggregation/bugs/skip_limit_overflow.js +++ b/jstests/aggregation/bugs/skip_limit_overflow.js @@ -19,10 +19,6 @@ assert.commandWorked(db.runCommand({create: coll.getName()})); function testPipeline(pipeline, expectedResult, optimizedAwayStages) { const explainOutput = coll.explain().aggregate(pipeline); - assert(explainOutput.hasOwnProperty("stages"), - "Expected pipeline " + tojsononeline(pipeline) + - " to use an aggregation framework in the explain output: " + tojson(explainOutput)); - if (optimizedAwayStages) { optimizedAwayStages.forEach( (stage) => @@ -31,17 +27,21 @@ function testPipeline(pipeline, expectedResult, optimizedAwayStages) { stage + " stage in the explain output: " + tojson(explainOutput))); } - for (let path in expectedResult) { - const subPaths = path.split("."); - const stageName = subPaths[0]; + for (const stageName in expectedResult) { + const path = expectedResult[stageName].path; + const expectedValue = expectedResult[stageName].expectedValue; + const stages = getAggPlanStages(explainOutput, stageName); assert(stages !== null, "Expected pipeline " + tojsononeline(pipeline) + " to include a " + stageName + " stage in the explain output: " + tojson(explainOutput)); - assert(stages.length == expectedResult[path].length, - "Expected pipeline " + tojsononeline(pipeline) + " to include " + - expectedResult[path].length + stageName + - " stages in the explain output: " + tojson(explainOutput)); + assert.eq(stages.length, + expectedValue.length, + "Expected pipeline " + tojsononeline(pipeline) + " to include " + + expectedValue.length + " " + stageName + + " stages in the explain output: " + tojson(explainOutput)); + + const subPaths = path.split("."); assert.eq( stages.reduce( (res, stage) => { @@ -49,7 +49,7 @@ function testPipeline(pipeline, expectedResult, optimizedAwayStages) { return res; }, []), - expectedResult[path], + expectedValue, "Stage: " + stageName + ", path: " + path + ", explain: " + tojson(explainOutput)); } @@ -60,35 +60,46 @@ function testPipeline(pipeline, expectedResult, optimizedAwayStages) { // Case where overflow of limit + skip prevents limit stage from being absorbed. Values // are specified as integrals > MAX_LONG. Note that we cannot specify this huge value as // a NumberLong, as we get a number conversion error (even if it's passed as a string). -testPipeline([{$sort: {x: -1}}, {$skip: 18446744073709552000}, {$limit: 6}], - {"$limit": [NumberLong(6)], "$skip": [NumberLong("9223372036854775807")]}); -testPipeline([{$sort: {x: -1}}, {$skip: 6}, {$limit: 18446744073709552000}], - {"$limit": [NumberLong("9223372036854775807")], "$skip": [NumberLong(6)]}); +testPipeline([{$sort: {x: -1}}, {$skip: 18446744073709552000}, {$limit: 6}], { + $limit: {path: "$limit", expectedValue: [NumberLong(6)]}, + $skip: {path: "$skip", expectedValue: [NumberLong("9223372036854775807")]} +}); +testPipeline([{$sort: {x: -1}}, {$skip: 6}, {$limit: 18446744073709552000}], { + $limit: {path: "$limit", expectedValue: [NumberLong("9223372036854775807")]}, + $skip: {path: "$skip", expectedValue: [NumberLong(6)]} +}); // Case where overflow of limit + skip prevents limit stage from being absorbed. One of the // values == MAX_LONG, another one is 1. -testPipeline([{$sort: {x: -1}}, {$skip: NumberLong("9223372036854775807")}, {$limit: 1}], - {"$limit": [NumberLong(1)], "$skip": [NumberLong("9223372036854775807")]}); -testPipeline([{$sort: {x: -1}}, {$skip: 1}, {$limit: NumberLong("9223372036854775807")}], - {"$limit": [NumberLong("9223372036854775807")], "$skip": [NumberLong(1)]}); +testPipeline([{$sort: {x: -1}}, {$skip: NumberLong("9223372036854775807")}, {$limit: 1}], { + $limit: {path: "$limit", expectedValue: [NumberLong(1)]}, + $skip: {path: "$skip", expectedValue: [NumberLong("9223372036854775807")]} +}); +testPipeline([{$sort: {x: -1}}, {$skip: 1}, {$limit: NumberLong("9223372036854775807")}], { + $limit: {path: "$limit", expectedValue: [NumberLong("9223372036854775807")]}, + $skip: {path: "$skip", expectedValue: [NumberLong(1)]} +}); // Case where limit + skip do not overflow. Limit == MAX_LONG and skip is 0. Should be able to // absorb the limit and skip stages. // Note that we cannot specify limit == 0, so we expect an error in this case. testPipeline([{$sort: {x: -1}}, {$skip: 0}, {$limit: NumberLong("9223372036854775807")}], - {"$sort.limit": [NumberLong("9223372036854775807")]}, + {SORT: {path: "limitAmount", expectedValue: [NumberLong("9223372036854775807")]}}, ["$skip", "$limit"]); // Case where limit + skip do not overflow. One value is MAX_LONG - 1 and another one is 1. // Should be able to absorb the limit stage. testPipeline([{$sort: {x: -1}}, {$skip: NumberLong("9223372036854775806")}, {$limit: 1}], { - "$sort.limit": [NumberLong("9223372036854775807")], - "$skip": [NumberLong("9223372036854775806")] + SORT: {path: "limitAmount", expectedValue: [NumberLong("9223372036854775807")]}, + $skip: {path: "$skip", expectedValue: [NumberLong("9223372036854775806")]} }, ["$limit"]); testPipeline([{$sort: {x: -1}}, {$skip: 1}, {$limit: NumberLong("9223372036854775806")}], - {"$sort.limit": [NumberLong("9223372036854775807")], "$skip": [NumberLong(1)]}, + { + SORT: {path: "limitAmount", expectedValue: [NumberLong("9223372036854775807")]}, + $skip: {path: "$skip", expectedValue: [NumberLong(1)]} + }, ["$limit"]); // Case where the first $limit can be pushed down, but the second overflows and thus remains in @@ -101,10 +112,13 @@ testPipeline( {$skip: 10}, {$limit: 1} ], - {"$sort.limit": [NumberLong("9223372036854775807")], "$limit": [NumberLong(1)]}); + { + SORT: {path: "limitAmount", expectedValue: [NumberLong("9223372036854775807")]}, + $limit: {path: "$limit", expectedValue: [NumberLong(1)]} + }); -// Case with multiple $limit and $skip stages where the second $limit ends up being the smallest. -// There is no overflow in this case. +// Case with multiple $limit and $skip stages where the second $limit ends up being the +// smallest. There is no overflow in this case. testPipeline( [ {$sort: {x: -1}}, @@ -114,37 +128,51 @@ testPipeline( {$limit: 1} ], { - "$sort.limit": [NumberLong("9223372036854775804")], - "$skip": [NumberLong("9223372036854775803")] + SORT: {path: "limitAmount", expectedValue: [NumberLong("9223372036854775804")]}, + $skip: {path: "$skip", expectedValue: [NumberLong("9223372036854775803")]} }); // Case where limit + skip do not overflow. Both values are < MAX_LONG. testPipeline([{$sort: {x: -1}}, {$skip: 674761616283}, {$limit: 35361718}], - {"$sort.limit": [NumberLong(674796978001)], "$skip": [NumberLong(674761616283)]}, + { + SORT: {path: "limitAmount", expectedValue: [NumberLong(674796978001)]}, + $skip: {path: "$skip", expectedValue: [NumberLong(674761616283)]} + }, ["$limit"]); testPipeline([{$sort: {x: -1}}, {$skip: 35361718}, {$limit: 674761616283}], - {"$sort.limit": [NumberLong(674796978001)], "$skip": [NumberLong(35361718)]}, + { + SORT: {path: "limitAmount", expectedValue: [NumberLong(674796978001)]}, + $skip: {path: "$skip", expectedValue: [NumberLong(35361718)]} + }, ["$limit"]); // Case where where overflow of limit + skip + skip prevents limit stage from being absorbed. // One skip == MAX_LONG - 1, another one is 1. Should merge two skip stages into one. testPipeline( [{$sort: {x: -1}}, {$skip: 1}, {$skip: NumberLong("9223372036854775806")}, {$limit: 1}], - {"$limit": [NumberLong(1)], "$skip": [NumberLong("9223372036854775807")]}); + { + $limit: {path: "$limit", expectedValue: [NumberLong(1)]}, + $skip: {path: "$skip", expectedValue: [NumberLong("9223372036854775807")]} + }, + ["$sort"]); // Case where where overflow of limit + skip + skip prevents limit stage from being absorbed. // One skip == MAX_LONG, another one is 1. Should not absorb or merge any stages. testPipeline( [{$sort: {x: -1}}, {$skip: 1}, {$skip: NumberLong("9223372036854775807")}, {$limit: 1}], - {"$limit": [NumberLong(1)], "$skip": [NumberLong(1), NumberLong("9223372036854775807")]}); + { + $limit: {path: "$limit", expectedValue: [NumberLong(1)]}, + $skip: {path: "$skip", expectedValue: [NumberLong(1), NumberLong("9223372036854775807")]} + }, + ["$sort"]); // Case where sample size is > MAX_LONG. testPipeline([{$sample: {size: 18446744073709552000}}], - {"$sample.size": [NumberLong("9223372036854775807")]}); + {$sample: {path: "$sample.size", expectedValue: [NumberLong("9223372036854775807")]}}); // Case where sample size is == MAX_LONG. testPipeline([{$sample: {size: NumberLong("9223372036854775807")}}], - {"$sample.size": [NumberLong("9223372036854775807")]}); + {$sample: {path: "$sample.size", expectedValue: [NumberLong("9223372036854775807")]}}); // Case where sample size is == MAX_LONG - 1. testPipeline([{$sample: {size: NumberLong("9223372036854775806")}}], - {"$sample.size": [NumberLong("9223372036854775806")]}); + {$sample: {path: "$sample.size", expectedValue: [NumberLong("9223372036854775806")]}}); })(); diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js index f4c2c0ac483..d6a27573333 100644 --- a/jstests/aggregation/optimize_away_pipeline.js +++ b/jstests/aggregation/optimize_away_pipeline.js @@ -251,11 +251,20 @@ assertPipelineUsesAggregation({ expectedStages: ["COLLSCAN"], expectedResult: [{_id: "null", s: 50}] }); + // TODO SERVER-40253: We cannot optimize away text search queries. assert.commandWorked(coll.createIndex({y: "text"})); assertPipelineUsesAggregation( {pipeline: [{$match: {$text: {$search: "abc"}}}], expectedStages: ["IXSCAN"]}); +// Test that $match, $sort, and $project all get answered by the PlanStage layer for a $text query. +assertPipelineUsesAggregation({ + pipeline: + [{$match: {$text: {$search: "abc"}}}, {$sort: {sortField: 1}}, {$project: {a: 1, b: 1}}], + expectedStages: ["TEXT", "SORT", "PROJECTION_SIMPLE"], + optimizedAwayStages: ["$match", "$sort", "$project"] +}); assert.commandWorked(coll.dropIndexes()); + // We cannot optimize away geo near queries. assert.commandWorked(coll.createIndex({"y": "2d"})); assertPipelineUsesAggregation({ @@ -429,6 +438,47 @@ assert.eq(30, skipStage.$skip, explain); assert.commandWorked(coll.dropIndexes()); +// $sort can be optimized away even if there is no index to provide the sort. +assertPipelineDoesNotUseAggregation({ + pipeline: [ + {$sort: {x: -1}}, + ], + expectedStages: ["COLLSCAN", "SORT"], + expectedResult: [{_id: 3, x: 30}, {_id: 2, x: 20}, {_id: 1, x: 10}], +}); + +// $match, $sort, $limit can be optimized away even if there is no index to provide the sort. +assertPipelineDoesNotUseAggregation({ + pipeline: [{$match: {x: {$gte: 0}}}, {$sort: {x: -1}}, {$limit: 1}], + expectedStages: ["COLLSCAN", "SORT"], + expectedResult: [{_id: 3, x: 30}], +}); + +// If there is a $project that can't result in a covered plan, however, then the pipeline cannot be +// optimized away. But the $sort should still get pushed down into the PlanStage layer. +assertPipelineUsesAggregation({ + pipeline: + [{$match: {x: {$gte: 20}}}, {$sort: {x: -1}}, {$project: {_id: 0, x: 1}}, {$limit: 2}], + expectedStages: ["COLLSCAN", "SORT"], + optimizedAwayStages: ["$match", "$sort", "$limit"], + expectedResult: [{x: 30}, {x: 20}], +}); + +// Test a case where there is a projection that can be covered by an index, but a blocking sort is +// still required. In this case, the entire pipeline can be optimized away. +assert.commandWorked(coll.createIndex({y: 1, x: 1})); +assertPipelineDoesNotUseAggregation({ + pipeline: [ + {$match: {y: {$gt: 0}, x: {$gte: 20}}}, + {$sort: {x: -1}}, + {$project: {_id: 0, y: 1, x: 1}}, + {$limit: 2} + ], + expectedStages: ["IXSCAN", "SORT", "PROJECTION_COVERED"], + expectedResult: [], +}); +assert.commandWorked(coll.dropIndexes()); + // getMore cases. // Test getMore on a collection with an optimized away pipeline. diff --git a/jstests/aggregation/sources/sort/explain_sort.js b/jstests/aggregation/sources/sort/explain_sort.js index f6d22e9e719..eee6526df11 100644 --- a/jstests/aggregation/sources/sort/explain_sort.js +++ b/jstests/aggregation/sources/sort/explain_sort.js @@ -17,20 +17,24 @@ function checkResults(results, verbosity, expectedNumResults = kNumDocs) { let cursorSubdocs = getAggPlanStages(results, "$cursor"); let nReturned = 0; let nExamined = 0; - assert.gt(cursorSubdocs.length, 0); for (let stageResult of cursorSubdocs) { const result = stageResult.$cursor; if (verbosity === "queryPlanner") { - assert(!result.hasOwnProperty("executionStats"), tojson(results)); + assert(!result.hasOwnProperty("executionStats"), results); } else if (cursorSubdocs.length === 1) { // If there was a single shard, then we can assert that 'nReturned' and // 'totalDocsExamined' are as expected. If there are multiple shards, these assertions // might not hold, since each shard enforces the limit on its own and then the merging // node enforces the limit again to obtain the final result set. - assert.eq(result.executionStats.nReturned, expectedNumResults, tojson(results)); - assert.eq(result.executionStats.totalDocsExamined, expectedNumResults, tojson(results)); + assert.eq(result.executionStats.nReturned, expectedNumResults, results); + assert.eq(result.executionStats.totalDocsExamined, expectedNumResults, results); } } + + // If there was no $cursor stage, then assert that the pipeline was optimized away. + if (cursorSubdocs.length === 0) { + assert(isQueryPlan(results), results); + } } for (let i = 0; i < kNumDocs; i++) { diff --git a/jstests/aggregation/use_query_projection.js b/jstests/aggregation/use_query_projection.js index ad83f06acb1..248ad19bbf6 100644 --- a/jstests/aggregation/use_query_projection.js +++ b/jstests/aggregation/use_query_projection.js @@ -23,7 +23,7 @@ function assertQueryCoversProjection({pipeline = [], pipelineOptimizedAway = tru const explainOutput = coll.explain().aggregate(pipeline); if (pipelineOptimizedAway) { - assert(isQueryPlan(explainOutput)); + assert(isQueryPlan(explainOutput), explainOutput); assert( !planHasStage(db, explainOutput, "FETCH"), "Expected pipeline " + tojsononeline(pipeline) + @@ -32,7 +32,7 @@ function assertQueryCoversProjection({pipeline = [], pipelineOptimizedAway = tru "Expected pipeline " + tojsononeline(pipeline) + " to include an index scan in the explain output: " + tojson(explainOutput)); } else { - assert(isAggregationPlan(explainOutput)); + assert(isAggregationPlan(explainOutput), explainOutput); assert( !aggPlanHasStage(explainOutput, "FETCH"), "Expected pipeline " + tojsononeline(pipeline) + @@ -102,7 +102,6 @@ assertQueryCoversProjection({ {$sort: {x: 1, a: 1}}, // Note: not indexable, but doesn't add any additional dependencies. {$project: {_id: 1, x: 1, a: 1}}, ], - pipelineOptimizedAway: false }); // Test that a multikey index will prevent a covered plan. diff --git a/jstests/aggregation/use_query_sort.js b/jstests/aggregation/use_query_sort.js index 8dbbc0c41ec..ec14625e856 100644 --- a/jstests/aggregation/use_query_sort.js +++ b/jstests/aggregation/use_query_sort.js @@ -18,63 +18,69 @@ for (let i = 0; i < 100; ++i) { } assert.commandWorked(bulk.execute()); -function assertHasNonBlockingQuerySort(pipeline) { +function assertHasNonBlockingQuerySort(pipeline, expectRejectedPlans) { const explainOutput = coll.explain().aggregate(pipeline); - assert(isQueryPlan(explainOutput)); - assert(!planHasStage(db, explainOutput, "SORT"), - "Expected pipeline " + tojsononeline(pipeline) + - " *not* to include a SORT stage in the explain output: " + tojson(explainOutput)); - assert(planHasStage(db, explainOutput, "IXSCAN"), - "Expected pipeline " + tojsononeline(pipeline) + - " to include an index scan in the explain output: " + tojson(explainOutput)); - assert(!hasRejectedPlans(explainOutput), - "Expected pipeline " + tojsononeline(pipeline) + - " not to have any rejected plans in the explain output: " + tojson(explainOutput)); + assert(isQueryPlan(explainOutput), explainOutput); + assert(!planHasStage(db, explainOutput, "SORT"), explainOutput); + assert(planHasStage(db, explainOutput, "IXSCAN"), explainOutput); + assert.eq(expectRejectedPlans, hasRejectedPlans(explainOutput), explainOutput); return explainOutput; } -function assertDoesNotHaveQuerySort(pipeline) { +function assertHasBlockingQuerySort(pipeline, expectRejectedPlans) { const explainOutput = coll.explain().aggregate(pipeline); - assert(isAggregationPlan(explainOutput)); - assert(aggPlanHasStage(explainOutput, "$sort"), - "Expected pipeline " + tojsononeline(pipeline) + - " to include a $sort stage in the explain output: " + tojson(explainOutput)); - assert(!aggPlanHasStage(explainOutput, "SORT"), - "Expected pipeline " + tojsononeline(pipeline) + - " *not* to include a SORT stage in the explain output: " + tojson(explainOutput)); - assert(!hasRejectedPlans(explainOutput), - "Expected pipeline " + tojsononeline(pipeline) + - " not to have any rejected plans in the explain output: " + tojson(explainOutput)); + assert(isQueryPlan(explainOutput), explainOutput); + assert(planHasStage(db, explainOutput, "SORT"), explainOutput); + assert.eq(expectRejectedPlans, hasRejectedPlans(explainOutput), explainOutput); +} + +function assertDoesNotHaveQuerySort(pipeline, expectRejectedPlans) { + const explainOutput = coll.explain().aggregate(pipeline); + assert(isAggregationPlan(explainOutput), explainOutput); + assert(aggPlanHasStage(explainOutput, "$sort"), explainOutput); + assert(!aggPlanHasStage(explainOutput, "SORT"), explainOutput); + assert.eq(expectRejectedPlans, hasRejectedPlans(explainOutput), explainOutput); return explainOutput; } -// Test that a sort on the _id can use the query system to provide the sort. -assertHasNonBlockingQuerySort([{$sort: {_id: -1}}]); -assertHasNonBlockingQuerySort([{$sort: {_id: 1}}]); -assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {_id: 1}}]); -assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {_id: -1}}]); +// Test that a sort on _id can use the query system to provide the sort. Since the sort and match +// are both on the _id field, we don't expect there to be any rejected plans. +assertHasNonBlockingQuerySort([{$sort: {_id: -1}}], false); +assertHasNonBlockingQuerySort([{$sort: {_id: 1}}], false); +assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {_id: 1}}], false); +assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {_id: -1}}], false); -// Test that a sort on a field not in any index cannot use a query system sort, and thus still -// has a $sort stage. -assertDoesNotHaveQuerySort([{$sort: {x: -1}}]); -assertDoesNotHaveQuerySort([{$sort: {x: 1}}]); -assertDoesNotHaveQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {x: 1}}]); +// Test that a sort on a field not in any index will use a SORT stage in the query layer. Since +// there is no index to support the sort, we don't expect any rejected plans. +assertHasBlockingQuerySort([{$sort: {x: -1}}], false); +assertHasBlockingQuerySort([{$sort: {x: 1}}], false); +assertHasBlockingQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {x: 1}}], false); assert.commandWorked(coll.createIndex({x: 1, y: -1})); -assertHasNonBlockingQuerySort([{$sort: {x: 1, y: -1}}]); -assertHasNonBlockingQuerySort([{$sort: {x: 1}}]); -assertDoesNotHaveQuerySort([{$sort: {y: 1}}]); -assertDoesNotHaveQuerySort([{$sort: {x: 1, y: 1}}]); +// Since there is an index to support these sorts, we expect the system to choose a non-blocking +// sort. The only indexed plan is an index-provided sort, so we don't expect any rejected plans. +assertHasNonBlockingQuerySort([{$sort: {x: 1, y: -1}}], false); +assertHasNonBlockingQuerySort([{$sort: {x: 1}}], false); -// Test that a $match on a field not present in the same index eligible to provide a sort can -// still result in a index scan on the sort field (SERVER-7568). -assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 50}}}, {$sort: {x: 1}}]); +// These sorts cannot be provided by an index, but it still should get pushed down to the query +// layer. The only plan is a COLLSCAN followed by a blocking sort, so we don't expect any rejected +// plans. +assertHasBlockingQuerySort([{$sort: {y: 1}}], false); +assertHasBlockingQuerySort([{$sort: {x: 1, y: 1}}], false); -// Test that a sort on the text score does not use the query system to provide the sort, since -// it would need to be a blocking sort, and we prefer the $sort stage to the query system's sort -// implementation. +// In this case, there are two possible plans: an _id index scan with a blocking SORT, or an +// index-provided sort by scanning the {x: 1, y: -1} index. Since the _id predicate is more +// selective, we expect the blocking SORT plan to win and there to be a rejected plan. +assertHasBlockingQuerySort([{$match: {_id: {$gte: 90}}}, {$sort: {x: 1}}], true); +// A query of the same shape will use a non-blocking plan if the predicate is not selective. +assertHasNonBlockingQuerySort([{$match: {_id: {$gte: 0}}}, {$sort: {x: 1}}], true); + +// Meta-sort on "textScore" currently cannot be pushed down into the query layer. See SERVER-43816. assert.commandWorked(coll.createIndex({x: "text"})); assertDoesNotHaveQuerySort( - [{$match: {$text: {$search: "test"}}}, {$sort: {key: {$meta: "textScore"}}}]); + [{$match: {$text: {$search: "test"}}}, {$sort: {key: {$meta: "textScore"}}}], false); + +// Meta-sort on "randVal" cannot be pushed into the query layer. See SERVER-43816. +assertDoesNotHaveQuerySort([{$sort: {key: {$meta: "randVal"}}}], false); }()); diff --git a/jstests/core/views/views_aggregation.js b/jstests/core/views/views_aggregation.js index db833937dda..b15c0f0094b 100644 --- a/jstests/core/views/views_aggregation.js +++ b/jstests/core/views/views_aggregation.js @@ -143,37 +143,45 @@ assert.commandWorked( // Test explain modes on a view. let explainPlan = assert.commandWorked( viewsDB.popSortedView.explain("queryPlanner").aggregate([{$limit: 1}, {$match: {pop: 3}}])); -assert.eq(explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll"); -assert(!explainPlan.stages[0].$cursor.hasOwnProperty("executionStats")); +assert.eq( + explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll", explainPlan); +assert(!explainPlan.stages[0].$cursor.hasOwnProperty("executionStats"), explainPlan); explainPlan = assert.commandWorked( viewsDB.popSortedView.explain("executionStats").aggregate([{$limit: 1}, {$match: {pop: 3}}])); -assert.eq(explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll"); -assert(explainPlan.stages[0].$cursor.hasOwnProperty("executionStats")); -assert.eq(explainPlan.stages[0].$cursor.executionStats.nReturned, 5); -assert(!explainPlan.stages[0].$cursor.executionStats.hasOwnProperty("allPlansExecution")); +assert.eq( + explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll", explainPlan); +assert(explainPlan.stages[0].$cursor.hasOwnProperty("executionStats"), explainPlan); +assert.eq(explainPlan.stages[0].$cursor.executionStats.nReturned, 1, explainPlan); +assert(!explainPlan.stages[0].$cursor.executionStats.hasOwnProperty("allPlansExecution"), + explainPlan); explainPlan = assert.commandWorked(viewsDB.popSortedView.explain("allPlansExecution") .aggregate([{$limit: 1}, {$match: {pop: 3}}])); -assert.eq(explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll"); -assert(explainPlan.stages[0].$cursor.hasOwnProperty("executionStats")); -assert.eq(explainPlan.stages[0].$cursor.executionStats.nReturned, 5); -assert(explainPlan.stages[0].$cursor.executionStats.hasOwnProperty("allPlansExecution")); +assert.eq( + explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll", explainPlan); +assert(explainPlan.stages[0].$cursor.hasOwnProperty("executionStats"), explainPlan); +assert.eq(explainPlan.stages[0].$cursor.executionStats.nReturned, 1, explainPlan); +assert(explainPlan.stages[0].$cursor.executionStats.hasOwnProperty("allPlansExecution"), + explainPlan); // Passing a value of true for the explain option to the aggregation command, without using the // shell explain helper, should continue to work. explainPlan = assert.commandWorked( viewsDB.popSortedView.aggregate([{$limit: 1}, {$match: {pop: 3}}], {explain: true})); -assert.eq(explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll"); -assert(!explainPlan.stages[0].$cursor.hasOwnProperty("executionStats")); +assert.eq( + explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll", explainPlan); +assert(!explainPlan.stages[0].$cursor.hasOwnProperty("executionStats"), explainPlan); // Test allPlansExecution explain mode on the base collection. explainPlan = assert.commandWorked( viewsDB.coll.explain("allPlansExecution").aggregate([{$limit: 1}, {$match: {pop: 3}}])); -assert.eq(explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll"); -assert(explainPlan.stages[0].$cursor.hasOwnProperty("executionStats")); -assert.eq(explainPlan.stages[0].$cursor.executionStats.nReturned, 1); -assert(explainPlan.stages[0].$cursor.executionStats.hasOwnProperty("allPlansExecution")); +assert.eq( + explainPlan.stages[0].$cursor.queryPlanner.namespace, "views_aggregation.coll", explainPlan); +assert(explainPlan.stages[0].$cursor.hasOwnProperty("executionStats"), explainPlan); +assert.eq(explainPlan.stages[0].$cursor.executionStats.nReturned, 1, explainPlan); +assert(explainPlan.stages[0].$cursor.executionStats.hasOwnProperty("allPlansExecution"), + explainPlan); // The explain:true option should not work when paired with the explain shell helper. assert.throws(function() { diff --git a/jstests/libs/analyze_plan.js b/jstests/libs/analyze_plan.js index 2c0c64088d1..74063ee5cd3 100644 --- a/jstests/libs/analyze_plan.js +++ b/jstests/libs/analyze_plan.js @@ -98,10 +98,13 @@ function hasRejectedPlans(root) { } if (root.hasOwnProperty("shards")) { - // This is a sharded agg explain. - const cursorStages = getAggPlanStages(root, "$cursor"); - return cursorStages.find((cursorStage) => cursorStageHasRejectedPlans(cursorStage)) !== - undefined; + // This is a sharded agg explain. Recursively check whether any of the shards has rejected + // plans. + const shardExplains = []; + for (const shard in root.shards) { + shardExplains.push(root.shards[shard]); + } + return shardExplains.some(hasRejectedPlans); } else if (root.hasOwnProperty("stages")) { // This is an agg explain. const cursorStages = getAggPlanStages(root, "$cursor"); diff --git a/jstests/noPassthrough/aggregation_cursor_invalidations.js b/jstests/noPassthrough/aggregation_cursor_invalidations.js index 07155ff7135..a610e8e5441 100644 --- a/jstests/noPassthrough/aggregation_cursor_invalidations.js +++ b/jstests/noPassthrough/aggregation_cursor_invalidations.js @@ -99,10 +99,13 @@ assertNoOpenCursorsOnSourceCollection(); // Test that dropping the source collection between an aggregate and a getMore will *not* cause // an aggregation pipeline to fail during the getMore if it *does not need* to fetch more // results from the collection. +// +// The test expects that the $sort will execute in the agg layer, and will not be pushed down into +// the PlanStage layer. We add an $_internalInhibitOptimization stage to enforce this. setup(); res = assert.commandWorked(testDB.runCommand({ aggregate: sourceCollection.getName(), - pipeline: [{$sort: {x: 1}}], + pipeline: [{$_internalInhibitOptimization: {}}, {$sort: {x: 1}}], cursor: { batchSize: batchSize, }, diff --git a/jstests/noPassthrough/pipeline_optimization_failpoint.js b/jstests/noPassthrough/pipeline_optimization_failpoint.js index 6181da559ad..543bc9d6a39 100644 --- a/jstests/noPassthrough/pipeline_optimization_failpoint.js +++ b/jstests/noPassthrough/pipeline_optimization_failpoint.js @@ -22,13 +22,25 @@ for (let i = 0; i < 25; ++i) { assert.commandWorked(coll.insert({_id: i, city: "Cleveland", pop: pop, state: "OH"})); } -const pipeline = [{$match: {state: "OH"}}, {$sort: {pop: -1}}, {$limit: 10}]; +const pipeline = [ + {$match: {state: "OH"}}, + // The test-only '$_internalInhibitOptimization' operator prevents the $sort and $limit from + // being pushed down into the PlanStage layer, thereby ensuring that these two stages remain + // inside the pipeline layer. We need to make sure that the pipeline does get optimized away, + // since the "disablePipelineOptimization" failpoint does nothing in the + // "optimizedPipeline:true" case. + {$_internalInhibitOptimization: {}}, + {$sort: {pop: -1}}, + {$limit: 10} +]; const enabledPlan = coll.explain().aggregate(pipeline); // Test that sort and the limit were combined. assert.eq(aggPlanHasStage(enabledPlan, "$limit"), false); +assert.eq(aggPlanHasStage(enabledPlan, "$cursor"), true); +assert.eq(aggPlanHasStage(enabledPlan, "$_internalInhibitOptimization"), true); assert.eq(aggPlanHasStage(enabledPlan, "$sort"), true); -assert.eq(enabledPlan.stages.length, 2); +assert.eq(enabledPlan.stages.length, 3); const enabledResult = coll.aggregate(pipeline).toArray(); @@ -38,9 +50,11 @@ assert.commandWorked( const disabledPlan = coll.explain().aggregate(pipeline); // Test that the $limit still exists and hasn't been optimized away. -assert.eq(aggPlanHasStage(disabledPlan, "$limit"), true); +assert.eq(aggPlanHasStage(enabledPlan, "$cursor"), true); +assert.eq(aggPlanHasStage(enabledPlan, "$_internalInhibitOptimization"), true); assert.eq(aggPlanHasStage(disabledPlan, "$sort"), true); -assert.eq(disabledPlan.stages.length, 3); +assert.eq(aggPlanHasStage(disabledPlan, "$limit"), true); +assert.eq(disabledPlan.stages.length, 4); const disabledResult = coll.aggregate(pipeline).toArray(); diff --git a/src/mongo/db/exec/sort.cpp b/src/mongo/db/exec/sort.cpp index eb5486b61ee..a7c4c045234 100644 --- a/src/mongo/db/exec/sort.cpp +++ b/src/mongo/db/exec/sort.cpp @@ -111,7 +111,7 @@ std::unique_ptr<PlanStageStats> SortStage::getStats() { _commonStats.isEOF = isEOF(); std::unique_ptr<PlanStageStats> ret = std::make_unique<PlanStageStats>(_commonStats, STAGE_SORT); - ret->specific = _sortExecutor.stats(); + ret->specific = _sortExecutor.cloneStats(); ret->children.emplace_back(child()->getStats()); return ret; } diff --git a/src/mongo/db/exec/sort.h b/src/mongo/db/exec/sort.h index f437ce0b719..b727771c8bb 100644 --- a/src/mongo/db/exec/sort.h +++ b/src/mongo/db/exec/sort.h @@ -72,12 +72,8 @@ public: std::unique_ptr<PlanStageStats> getStats(); - /** - * Returns nullptr. Stats related to sort execution must be extracted with 'getStats()', since - * they are retrieved on demand from the underlying sort execution machinery. - */ const SpecificStats* getSpecificStats() const final { - return nullptr; + return &_sortExecutor.stats(); } private: @@ -86,8 +82,6 @@ private: SortExecutor _sortExecutor; - SortStats _specificStats; - // Whether or not we have finished loading data into '_sortExecutor'. bool _populated = false; }; diff --git a/src/mongo/db/exec/sort_executor.cpp b/src/mongo/db/exec/sort_executor.cpp index 51e0255a01d..bf2d02a465d 100644 --- a/src/mongo/db/exec/sort_executor.cpp +++ b/src/mongo/db/exec/sort_executor.cpp @@ -55,10 +55,13 @@ SortExecutor::SortExecutor(SortPattern sortPattern, std::string tempDir, bool allowDiskUse) : _sortPattern(std::move(sortPattern)), - _limit(limit), - _maxMemoryUsageBytes(maxMemoryUsageBytes), _tempDir(std::move(tempDir)), - _diskUseAllowed(allowDiskUse) {} + _diskUseAllowed(allowDiskUse) { + _stats.sortPattern = + _sortPattern.serialize(SortPattern::SortKeySerialization::kForExplain).toBson(); + _stats.limit = limit; + _stats.maxMemoryUsageBytes = maxMemoryUsageBytes; +} boost::optional<Document> SortExecutor::getNextDoc() { auto wsm = getNextWsm(); @@ -114,7 +117,7 @@ void SortExecutor::add(Value sortKey, WorkingSetMember data) { } _sorter->add(std::move(sortKey), std::move(data)); - _totalDataSizeBytes += data.getMemUsage(); + _stats.totalDataSizeBytes += data.getMemUsage(); } void SortExecutor::loadingDone() { @@ -123,17 +126,17 @@ void SortExecutor::loadingDone() { _sorter.reset(DocumentSorter::make(makeSortOptions(), Comparator(_sortPattern))); } _output.reset(_sorter->done()); - _wasDiskUsed = _wasDiskUsed || _sorter->usedDisk(); + _stats.wasDiskUsed = _stats.wasDiskUsed || _sorter->usedDisk(); _sorter.reset(); } SortOptions SortExecutor::makeSortOptions() const { SortOptions opts; - if (_limit) { - opts.limit = _limit; + if (_stats.limit) { + opts.limit = _stats.limit; } - opts.maxMemoryUsageBytes = _maxMemoryUsageBytes; + opts.maxMemoryUsageBytes = _stats.maxMemoryUsageBytes; if (_diskUseAllowed) { opts.extSortAllowed = true; opts.tempDir = _tempDir; @@ -142,15 +145,8 @@ SortOptions SortExecutor::makeSortOptions() const { return opts; } -std::unique_ptr<SortStats> SortExecutor::stats() const { - auto stats = std::make_unique<SortStats>(); - stats->sortPattern = - _sortPattern.serialize(SortPattern::SortKeySerialization::kForExplain).toBson(); - stats->limit = _limit; - stats->maxMemoryUsageBytes = _maxMemoryUsageBytes; - stats->totalDataSizeBytes = _totalDataSizeBytes; - stats->wasDiskUsed = _wasDiskUsed; - return stats; +std::unique_ptr<SortStats> SortExecutor::cloneStats() const { + return std::unique_ptr<SortStats>{static_cast<SortStats*>(_stats.clone())}; } } // namespace mongo diff --git a/src/mongo/db/exec/sort_executor.h b/src/mongo/db/exec/sort_executor.h index c0945fe1fd4..085311e19da 100644 --- a/src/mongo/db/exec/sort_executor.h +++ b/src/mongo/db/exec/sort_executor.h @@ -68,20 +68,20 @@ public: * the smallest limit. */ void setLimit(uint64_t limit) { - if (!_limit || limit < _limit) - _limit = limit; + if (!_stats.limit || limit < _stats.limit) + _stats.limit = limit; } uint64_t getLimit() const { - return _limit; + return _stats.limit; } bool hasLimit() const { - return _limit > 0; + return _stats.limit > 0; } bool wasDiskUsed() const { - return _wasDiskUsed; + return _stats.wasDiskUsed; } /** @@ -107,7 +107,11 @@ public: return _isEOF; } - std::unique_ptr<SortStats> stats() const; + const SortStats& stats() const { + return _stats; + } + + std::unique_ptr<SortStats> cloneStats() const; private: using DocumentSorter = Sorter<Value, WorkingSetMember>; @@ -124,18 +128,15 @@ private: SortOptions makeSortOptions() const; - SortPattern _sortPattern; - // A limit of zero is defined as no limit. - uint64_t _limit; - uint64_t _maxMemoryUsageBytes; - std::string _tempDir; - bool _diskUseAllowed = false; + const SortPattern _sortPattern; + const std::string _tempDir; + const bool _diskUseAllowed; std::unique_ptr<DocumentSorter> _sorter; std::unique_ptr<DocumentSorter::Iterator> _output; + SortStats _stats; + bool _isEOF = false; - bool _wasDiskUsed = false; - uint64_t _totalDataSizeBytes = 0u; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp index 803f78422b9..6ad22cce49d 100644 --- a/src/mongo/db/pipeline/document_source_cursor.cpp +++ b/src/mongo/db/pipeline/document_source_cursor.cpp @@ -183,10 +183,6 @@ Value DocumentSourceCursor::serialize(boost::optional<ExplainOptions::Verbosity> verbosity == pExpCtx->explain); MutableDocument out; - out["query"] = Value(_query); - - if (!_sort.isEmpty()) - out["sort"] = Value(_sort); BSONObjBuilder explainStatsBuilder; diff --git a/src/mongo/db/pipeline/document_source_cursor.h b/src/mongo/db/pipeline/document_source_cursor.h index eeb03d1ea55..3db747e66db 100644 --- a/src/mongo/db/pipeline/document_source_cursor.h +++ b/src/mongo/db/pipeline/document_source_cursor.h @@ -83,36 +83,6 @@ public: const boost::intrusive_ptr<ExpressionContext>& pExpCtx, bool trackOplogTimestamp = false); - /* - Record the query that was specified for the cursor this wraps, if - any. - - This should be captured after any optimizations are applied to - the pipeline so that it reflects what is really used. - - This gets used for explain output. - - @param pBsonObj the query to record - */ - void setQuery(const BSONObj& query) { - _query = query; - } - - /* - Record the sort that was specified for the cursor this wraps, if - any. - - This should be captured after any optimizations are applied to - the pipeline so that it reflects what is really used. - - This gets used for explain output. - - @param pBsonObj the sort to record - */ - void setSort(const BSONObj& sort) { - _sort = sort; - } - /** * If subsequent sources need no information from the cursor, the cursor can simply output empty * documents, avoiding the overhead of converting BSONObjs to Documents. @@ -133,6 +103,10 @@ public: return _planSummaryStats; } + bool usedDisk() final { + return _planSummaryStats.usedDisk; + } + protected: DocumentSourceCursor(Collection* collection, std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec, @@ -183,9 +157,6 @@ private: // Batches results returned from the underlying PlanExecutor. std::deque<Document> _currentBatch; - // BSONObj members must outlive _projection and cursor. - BSONObj _query; - BSONObj _sort; bool _shouldProduceEmptyDocs = false; // The underlying query plan which feeds this pipeline. Must be destroyed while holding the diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index f8d88637824..2722cc7df7f 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -265,10 +265,7 @@ DocumentSource::GetNextResult DocumentSourceLookUp::doGetNext() { objsize <= maxBytes); results.emplace_back(std::move(*result)); } - for (auto&& source : pipeline->getSources()) { - if (source->usedDisk()) - _usedDisk = true; - } + _usedDisk = _usedDisk || pipeline->usedDisk(); MutableDocument output(std::move(inputDoc)); output.setNestedField(_as, Value(std::move(results))); diff --git a/src/mongo/db/pipeline/document_source_single_document_transformation.h b/src/mongo/db/pipeline/document_source_single_document_transformation.h index 6d5daa7171a..8ef435bd776 100644 --- a/src/mongo/db/pipeline/document_source_single_document_transformation.h +++ b/src/mongo/db/pipeline/document_source_single_document_transformation.h @@ -85,10 +85,6 @@ public: return *_parsedTransform; } - bool isSubsetOfProjection(const BSONObj& proj) const { - return _parsedTransform->isSubsetOfProjection(proj); - } - protected: GetNextResult doGetNext() final; void doDispose() final; diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection_node.h b/src/mongo/db/pipeline/parsed_aggregation_projection_node.h index 92ffff5529d..3e77720858a 100644 --- a/src/mongo/db/pipeline/parsed_aggregation_projection_node.h +++ b/src/mongo/db/pipeline/parsed_aggregation_projection_node.h @@ -126,6 +126,11 @@ public: void serialize(boost::optional<ExplainOptions::Verbosity> explain, MutableDocument* output) const; + /** + * Returns true if this node or any child of this node contains a computed field. + */ + bool subtreeContainsComputedFields() const; + protected: // Returns a unique_ptr to a new instance of the implementing class for the given 'fieldName'. virtual std::unique_ptr<ProjectionNode> makeChild(std::string fieldName) const = 0; @@ -182,9 +187,6 @@ private: // Returns nullptr if no such child exists. ProjectionNode* getChild(const std::string& field) const; - // Returns true if this node or any child of this node contains a computed field. - bool subtreeContainsComputedFields() const; - // Our projection semantics are such that all field additions need to be processed in the order // specified. '_orderToProcessAdditionsAndChildren' tracks that order. // diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp index 23cc83fabe7..b0f990f753e 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp @@ -216,19 +216,29 @@ void ParsedInclusionProjection::parseSubObject(const BSONObj& subObj, } } -bool ParsedInclusionProjection::isSubsetOfProjection(const BSONObj& proj) const { +bool ParsedInclusionProjection::isEquivalentToDependencySet(const BSONObj& deps) const { std::set<std::string> preservedPaths; _root->reportProjectedPaths(&preservedPaths); - for (auto&& includedField : preservedPaths) { - if (!proj.hasField(includedField)) + size_t numDependencies = 0; + for (auto&& dependency : deps) { + if (!dependency.trueValue()) { + // This is not an included field, so move on. + continue; + } + + if (preservedPaths.find(dependency.fieldNameStringData().toString()) == + preservedPaths.end()) { return false; + } + ++numDependencies; + } + + if (numDependencies != preservedPaths.size()) { + return false; } // If the inclusion has any computed fields or renamed fields, then it's not a subset. - std::set<std::string> computedPaths; - StringMap<std::string> renamedPaths; - _root->reportComputedPaths(&computedPaths, &renamedPaths); - return computedPaths.empty() && renamedPaths.empty(); + return !_root->subtreeContainsComputedFields(); } } // namespace parsed_aggregation_projection diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h index a3f205b90b1..67977e41b4b 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h @@ -175,11 +175,11 @@ public: Document applyProjection(const Document& inputDoc) const final; /* - * Checks whether the inclusion projection represented by the InclusionNode - * tree is a subset of the object passed in. Projections that have any - * computed or renamed fields are not considered a subset. + * Given 'deps', a BSONObj describing a the dependency set for a pipeline, returns true if this + * is an inclusion projection with no computed paths which includes the exact same set of fields + * as 'deps'. */ - bool isSubsetOfProjection(const BSONObj& proj) const final; + bool isEquivalentToDependencySet(const BSONObj& deps) const; private: /** diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp index 6412cbdb15e..89e5c180863 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp +++ b/src/mongo/db/pipeline/parsed_inclusion_projection_test.cpp @@ -832,92 +832,114 @@ TEST(InclusionProjectionExecutionTest, ComputedFieldShouldReplaceNestedArrayForN } // -// Detection of subset projection. +// Detection of equivalency to the dependency set. // -TEST(InclusionProjectionExecutionTest, ShouldDetectSubsetForIdenticalProjection) { +TEST(InclusionProjectionExecutionTest, ShouldDetectEquivalenceForIdenticalProjection) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << true << "b" << true)); - auto proj = BSON("_id" << false << "a" << true << "b" << true); + auto proj = BSON("_id" << true << "a" << true << "b" << true); - ASSERT_TRUE(inclusion.isSubsetOfProjection(proj)); + ASSERT_TRUE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectSubsetForSupersetProjection) { +TEST(InclusionProjectionExecutionTest, ShouldNotDetectEquivalenceForSupersetProjection) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << true << "b" << true)); - auto proj = BSON("_id" << false << "a" << true << "b" << true << "c" << true); + auto proj = BSON("_id" << true << "a" << true << "b" << true << "c" << true); - ASSERT_TRUE(inclusion.isSubsetOfProjection(proj)); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectSubsetForIdenticalNestedProjection) { +TEST(InclusionProjectionExecutionTest, ShouldDetectEquivalenceForIdenticalNestedProjection) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a.b" << true)); - auto proj = BSON("_id" << false << "a.b" << true); + auto proj = BSON("_id" << true << "a.b" << true); - ASSERT_TRUE(inclusion.isSubsetOfProjection(proj)); + ASSERT_TRUE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectSubsetForSupersetProjectionWithNestedFields) { +TEST(InclusionProjectionExecutionTest, + ShouldNotDetectEquivalenceForSupersetProjectionWithNestedFields) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << true << "c" << BSON("d" << true))); - auto proj = BSON("_id" << false << "a" << true << "b" << true << "c.d" << true); + auto proj = BSON("_id" << true << "a" << true << "b" << true << "c.d" << true); - ASSERT_TRUE(inclusion.isSubsetOfProjection(proj)); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectNonSubsetForProjectionWithMissingFields) { +TEST(InclusionProjectionExecutionTest, ShouldNotDetectEquivalenceForProjectionWithMissingFields) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << true << "b" << true)); - auto proj = BSON("_id" << false << "a" << true); - ASSERT_FALSE(inclusion.isSubsetOfProjection(proj)); + auto proj = BSON("_id" << true << "a" << true); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); - proj = BSON("_id" << false << "a" << true << "c" << true); - ASSERT_FALSE(inclusion.isSubsetOfProjection(proj)); + proj = BSON("_id" << true << "a" << true << "c" << true); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); } TEST(InclusionProjectionExecutionTest, - ShouldDetectNonSubsetForSupersetProjectionWithoutComputedFields) { + ShouldNotDetectEquivalenceForSupersetProjectionWithoutComputedFields) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << true << "b" << true << "c" << BSON("$literal" << 1))); auto proj = BSON("_id" << false << "a" << true << "b" << true); - ASSERT_FALSE(inclusion.isSubsetOfProjection(proj)); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectNonSubsetForProjectionWithMissingNestedFields) { +TEST(InclusionProjectionExecutionTest, + ShouldNotDetectEquivalenceForProjectionWithMissingNestedFields) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a.b" << true << "a.c" << true)); auto proj = BSON("_id" << false << "a.b" << true); - ASSERT_FALSE(inclusion.isSubsetOfProjection(proj)); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectNonSubsetForProjectionWithRenamedFields) { +TEST(InclusionProjectionExecutionTest, ShouldNotDetectEquivalenceForProjectionWithRenamedFields) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << "$b")); auto proj = BSON("_id" << false << "b" << true); - ASSERT_FALSE(inclusion.isSubsetOfProjection(proj)); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); } -TEST(InclusionProjectionExecutionTest, ShouldDetectNonSubsetForProjectionWithMissingIdField) { +TEST(InclusionProjectionExecutionTest, ShouldNotDetectEquivalenceForProjectionWithMissingIdField) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(); inclusion.parse(BSON("a" << true)); auto proj = BSON("a" << true); - ASSERT_FALSE(inclusion.isSubsetOfProjection(proj)); + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); +} + +TEST(InclusionProjectionExecutionTest, + ShouldNotDetectEquivalenceIfDependenciesExplicitlyExcludeId) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies(); + inclusion.parse(BSON("a" << true)); + + auto proj = BSON("_id" << false << "a" << true); + + ASSERT_FALSE(inclusion.isEquivalentToDependencySet(proj)); +} + +TEST(InclusionProjectionExecutionTest, + ShouldDetectEquivalenceIfBothDepsAndProjExplicitlyExcludeId) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies(); + inclusion.parse(BSON("_id" << false << "a" << true)); + + auto proj = BSON("_id" << false << "a" << true); + + ASSERT_TRUE(inclusion.isEquivalentToDependencySet(proj)); } } // namespace diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index a0e6996829e..0ce917467f5 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -66,6 +66,7 @@ #include "mongo/db/pipeline/document_source_sample_from_random_cursor.h" #include "mongo/db/pipeline/document_source_single_document_transformation.h" #include "mongo/db/pipeline/document_source_sort.h" +#include "mongo/db/pipeline/parsed_inclusion_projection.h" #include "mongo/db/pipeline/pipeline.h" #include "mongo/db/query/collation/collator_interface.h" #include "mongo/db/query/get_executor.h" @@ -187,10 +188,9 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> createRandomCursorEx } StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExecutor( - OperationContext* opCtx, + const intrusive_ptr<ExpressionContext>& expCtx, Collection* collection, const NamespaceString& nss, - const intrusive_ptr<ExpressionContext>& pExpCtx, BSONObj queryObj, BSONObj projectionObj, const QueryMetadataBitSet& metadataRequested, @@ -201,7 +201,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe const size_t plannerOpts, const MatchExpressionParser::AllowedFeatureSet& matcherFeatures) { auto qr = std::make_unique<QueryRequest>(nss); - qr->setTailableMode(pExpCtx->tailableMode); + qr->setTailableMode(expCtx->tailableMode); qr->setFilter(queryObj); qr->setProj(projectionObj); qr->setSort(sortObj); @@ -214,12 +214,12 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe // The collation on the ExpressionContext has been resolved to either the user-specified // collation or the collection default. This BSON should never be empty even if the resolved // collator is simple. - qr->setCollation(pExpCtx->getCollatorBSON()); + qr->setCollation(expCtx->getCollatorBSON()); - const ExtensionsCallbackReal extensionsCallback(pExpCtx->opCtx, &nss); + const ExtensionsCallbackReal extensionsCallback(expCtx->opCtx, &nss); auto cq = CanonicalQuery::canonicalize( - opCtx, std::move(qr), pExpCtx, extensionsCallback, matcherFeatures); + expCtx->opCtx, std::move(qr), expCtx, extensionsCallback, matcherFeatures); if (!cq.isOK()) { // Return an error instead of uasserting, since there are cases where the combination of @@ -248,7 +248,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe // example, if we have a document {a: [1,2]} and group by "a" a DISTINCT_SCAN on an "a" // index would produce one result for '1' and another for '2', which would be incorrect. auto distinctExecutor = - getExecutorDistinct(opCtx, + getExecutorDistinct(expCtx->opCtx, collection, plannerOpts | QueryPlannerParams::STRICT_DISTINCT_ONLY, &parsedDistinct); @@ -264,7 +264,8 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe } bool permitYield = true; - return getExecutorFind(opCtx, collection, std::move(cq.getValue()), permitYield, plannerOpts); + return getExecutorFind( + expCtx->opCtx, collection, std::move(cq.getValue()), permitYield, plannerOpts); } /** @@ -361,13 +362,15 @@ PipelineD::buildInnerQueryExecutor(Collection* collection, // TODO SERVER-37453 this should no longer be necessary when we no don't need locks // to destroy a PlanExecutor. auto deps = pipeline->getDependencies(DepsTracker::kNoMetadata); + const bool shouldProduceEmptyDocs = deps.hasNoRequirements(); auto attachExecutorCallback = - [deps](Collection* collection, - std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec, - Pipeline* pipeline) { + [shouldProduceEmptyDocs]( + Collection* collection, + std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec, + Pipeline* pipeline) { auto cursor = DocumentSourceCursor::create( collection, std::move(exec), pipeline->getContext()); - addCursorSource(pipeline, std::move(cursor), std::move(deps)); + addCursorSource(pipeline, std::move(cursor), shouldProduceEmptyDocs); }; return std::make_pair(std::move(attachExecutorCallback), std::move(exec)); } @@ -485,24 +488,10 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, } } - // Find the set of fields in the source documents depended on by this pipeline. - DepsTracker deps = pipeline->getDependencies(DocumentSourceMatch::isTextQuery(queryObj) - ? DepsTracker::kOnlyTextScore - : DepsTracker::kNoMetadata); - - BSONObj projForQuery = deps.toProjectionWithoutMetadata(); - boost::intrusive_ptr<DocumentSourceSort> sortStage; boost::intrusive_ptr<DocumentSourceGroup> groupStage; std::tie(sortStage, groupStage) = getSortAndGroupStagesFromPipeline(pipeline->_sources); - BSONObj sortObj; - if (sortStage) { - sortObj = sortStage->getSortKeyPattern() - .serialize(SortPattern::SortKeySerialization::kForPipelineSerialization) - .toBson(); - } - std::unique_ptr<GroupFromFirstDocumentTransformation> rewrittenGroupStage; if (groupStage) { rewrittenGroupStage = groupStage->rewriteGroupAsTransformOnFirstDocument(); @@ -525,21 +514,26 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, // layer, but that is handled elsewhere. const auto limit = extractLimitForPushdown(pipeline); + auto metadataAvailable = DocumentSourceMatch::isTextQuery(queryObj) + ? DepsTracker::kOnlyTextScore + : DepsTracker::kNoMetadata; + // Create the PlanExecutor. - auto exec = uassertStatusOK(prepareExecutor(expCtx->opCtx, + BSONObj projForQuery; + bool shouldProduceEmptyDocs = false; + auto exec = uassertStatusOK(prepareExecutor(expCtx, collection, nss, pipeline, - expCtx, sortStage, std::move(rewrittenGroupStage), - deps, + metadataAvailable, queryObj, limit, aggRequest, Pipeline::kAllowedMatcherFeatures, - &sortObj, - &projForQuery)); + &projForQuery, + &shouldProduceEmptyDocs)); if (!projForQuery.isEmpty() && !sources.empty()) { @@ -547,8 +541,14 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, // projection generated by the dependency optimization. auto proj = dynamic_cast<DocumentSourceSingleDocumentTransformation*>(sources.front().get()); - if (proj && proj->isSubsetOfProjection(projForQuery)) { - sources.pop_front(); + if (proj && + proj->getType() == TransformerInterface::TransformerType::kInclusionProjection) { + auto&& inclusionProj = + static_cast<const parsed_aggregation_projection::ParsedInclusionProjection&>( + proj->getTransformer()); + if (inclusionProj.isEquivalentToDependencySet(projForQuery)) { + sources.pop_front(); + } } } @@ -556,13 +556,13 @@ PipelineD::buildInnerQueryExecutorGeneric(Collection* collection, const bool trackOplogTS = (pipeline->peekFront() && pipeline->peekFront()->constraints().isChangeStreamStage()); - auto attachExecutorCallback = [deps, queryObj, sortObj, projForQuery, trackOplogTS]( + auto attachExecutorCallback = [shouldProduceEmptyDocs, trackOplogTS]( Collection* collection, std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec, Pipeline* pipeline) { auto cursor = DocumentSourceCursor::create( collection, std::move(exec), pipeline->getContext(), trackOplogTS); - addCursorSource(pipeline, std::move(cursor), std::move(deps), queryObj, sortObj); + addCursorSource(pipeline, std::move(cursor), shouldProduceEmptyDocs); }; return std::make_pair(std::move(attachExecutorCallback), std::move(exec)); } @@ -582,8 +582,6 @@ PipelineD::buildInnerQueryExecutorGeoNear(Collection* collection, const auto geoNearStage = dynamic_cast<DocumentSourceGeoNear*>(sources.front().get()); invariant(geoNearStage); - auto deps = pipeline->getDependencies(DepsTracker::kAllGeoNearData); - // If the user specified a "key" field, use that field to satisfy the "near" query. Otherwise, // look for a geo-indexed field in 'collection' that can. auto nearFieldName = @@ -594,28 +592,24 @@ PipelineD::buildInnerQueryExecutorGeoNear(Collection* collection, // Create a PlanExecutor whose query is the "near" predicate on 'nearFieldName' combined with // the optional "query" argument in the $geoNear stage. BSONObj fullQuery = geoNearStage->asNearQuery(nearFieldName); - BSONObj proj = deps.toProjectionWithoutMetadata(); - BSONObj sortFromQuerySystem; - auto exec = uassertStatusOK(prepareExecutor(expCtx->opCtx, + + BSONObj proj; + bool shouldProduceEmptyDocs = false; + auto exec = uassertStatusOK(prepareExecutor(expCtx, collection, nss, pipeline, - expCtx, nullptr, /* sortStage */ nullptr, /* rewrittenGroupStage */ - deps, + DepsTracker::kAllGeoNearData, std::move(fullQuery), boost::none, /* limit */ aggRequest, Pipeline::kGeoNearMatcherFeatures, - &sortFromQuerySystem, - &proj)); + &proj, + &shouldProduceEmptyDocs)); - invariant(sortFromQuerySystem.isEmpty(), - str::stream() << "Unexpectedly got the following sort from the query system: " - << sortFromQuerySystem.jsonString()); - - auto attachExecutorCallback = [deps, + auto attachExecutorCallback = [shouldProduceEmptyDocs, distanceField = geoNearStage->getDistanceField(), locationField = geoNearStage->getLocationField(), distanceMultiplier = @@ -629,7 +623,7 @@ PipelineD::buildInnerQueryExecutorGeoNear(Collection* collection, distanceField, locationField, distanceMultiplier); - addCursorSource(pipeline, std::move(cursor), std::move(deps)); + addCursorSource(pipeline, std::move(cursor), shouldProduceEmptyDocs); }; // Remove the initial $geoNear; it will be replaced by $geoNearCursor. sources.pop_front(); @@ -637,45 +631,23 @@ PipelineD::buildInnerQueryExecutorGeoNear(Collection* collection, } StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prepareExecutor( - OperationContext* opCtx, + const intrusive_ptr<ExpressionContext>& expCtx, Collection* collection, const NamespaceString& nss, Pipeline* pipeline, - const intrusive_ptr<ExpressionContext>& expCtx, const boost::intrusive_ptr<DocumentSourceSort>& sortStage, std::unique_ptr<GroupFromFirstDocumentTransformation> rewrittenGroupStage, - const DepsTracker& deps, + QueryMetadataBitSet metadataAvailable, const BSONObj& queryObj, boost::optional<long long> limit, const AggregationRequest* aggRequest, const MatchExpressionParser::AllowedFeatureSet& matcherFeatures, - BSONObj* sortObj, - BSONObj* projectionObj) { - // The query system has the potential to use an index to provide a non-blocking sort and/or to - // use the projection to generate a covered plan. If this is possible, it is more efficient to - // let the query system handle those parts of the pipeline. If not, it is more efficient to use - // a $sort and/or a $project. Thus, we will determine whether the query system can - // provide a non-blocking sort or a covered projection before we commit to a PlanExecutor. - // - // To determine if the query system can provide a non-blocking sort, we pass the - // NO_BLOCKING_SORT planning option, meaning 'getExecutor' will not produce a PlanExecutor if - // the query system would use a blocking sort stage. - // - // To determine if the query system can provide a covered projection, we pass the - // NO_UNCOVERED_PROJECTS planning option, meaning 'getExecutor' will not produce a PlanExecutor - // if the query system would need to fetch the document to do the projection. The following - // logic uses the above strategies, with multiple calls to 'attemptToGetExecutor' to determine - // the most efficient way to handle the $sort and $project stages. - // - // LATER - We should attempt to determine if the results from the query are returned in some - // order so we can then apply other optimizations there are tickets for, such as SERVER-4507. - size_t plannerOpts = QueryPlannerParams::DEFAULT | QueryPlannerParams::NO_BLOCKING_SORT; + BSONObj* projectionObj, + bool* hasNoRequirements) { + invariant(projectionObj); + invariant(hasNoRequirements); - if (deps.hasNoRequirements()) { - // If we don't need any fields from the input document, performing a count is faster, and - // will output empty documents, which is okay. - plannerOpts |= QueryPlannerParams::IS_COUNT; - } + size_t plannerOpts = QueryPlannerParams::DEFAULT; if (pipeline->peekFront() && pipeline->peekFront()->constraints().isChangeStreamStage()) { invariant(expCtx->tailableMode == TailableModeEnum::kTailableAndAwaitData); @@ -687,6 +659,43 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep expCtx->use42ChangeStreamSortKeys = true; } + // If there is a sort stage eligible for pushdown, serialize its SortPattern to a BSONObj. The + // BSONObj format is currently necessary to request that the sort is computed by the query layer + // inside the inner PlanExecutor. We also remove the $sort stage from the Pipeline, since it + // will be handled instead by PlanStage execution. + BSONObj sortObj; + if (sortStage && canSortBePushedDown(sortStage->getSortKeyPattern())) { + sortObj = sortStage->getSortKeyPattern() + .serialize(SortPattern::SortKeySerialization::kForPipelineSerialization) + .toBson(); + + // If the $sort has a coalesced $limit, then we push it down as well. Since the $limit was + // after a $sort in the pipeline, it should not have been provided by the caller. + invariant(!limit); + limit = sortStage->getLimit(); + + pipeline->popFrontWithName(DocumentSourceSort::kStageName); + } + + // Perform dependency analysis. In order to minimize the dependency set, we only analyze the + // stages that remain in the pipeline after pushdown. In particular, any dependencies for a + // $match or $sort pushed down into the query layer will not be reflected here. + auto deps = pipeline->getDependencies(metadataAvailable); + *hasNoRequirements = deps.hasNoRequirements(); + *projectionObj = deps.toProjectionWithoutMetadata(); + + // If we're pushing down a sort, and a merge will be required later, then we need the query + // system to produce sortKey metadata. + if (!sortObj.isEmpty() && expCtx->needsMerge) { + deps.setNeedsMetadata(DocumentMetadataFields::kSortKey, true); + } + + if (deps.hasNoRequirements()) { + // This query might be eligible for count optimizations, since the remaining stages in the + // pipeline don't actually need to read any data produced by the query execution layer. + plannerOpts |= QueryPlannerParams::IS_COUNT; + } + if (rewrittenGroupStage) { BSONObj emptySort; @@ -695,14 +704,13 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep // attemptToGetExecutor() calls below) causes getExecutorDistinct() to ignore some otherwise // valid DISTINCT_SCAN plans, so we pass the projection and exclude the // NO_UNCOVERED_PROJECTIONS planner parameter. - auto swExecutorGrouped = attemptToGetExecutor(opCtx, + auto swExecutorGrouped = attemptToGetExecutor(expCtx, collection, nss, - expCtx, queryObj, *projectionObj, deps.metadataDeps(), - sortObj ? *sortObj : emptySort, + sortObj, boost::none, /* limit */ rewrittenGroupStage->groupId(), aggRequest, @@ -736,10 +744,28 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep } } - const BSONObj emptyProjection; - const BSONObj metaSortProjection = BSON("$sortKey" << BSON("$meta" - << "sortKey")); - + // Unlike stages such as $match and limit which always get pushed down into the inner + // PlanExecutor when present at the front of the pipeline, 'projectionObj' may not always be + // pushed down. (Note that 'projectionObj' is generated based on the dependency set, and + // therefore is not always identical to a $project stage in the pipeline.) The query system has + // the potential to use an index produce a covered plan, computing the projection based on index + // keys rather than documents fetched from the collection. If this is possible, it is more + // efficient to let the query system handle the projection, since covered plans typically have a + // large performance advantage. If not, it is more currently more efficient to compute the + // projection in the agg layer. + // + // To determine if the query system can provide a covered projection, we pass the + // NO_UNCOVERED_PROJECTIONS planning option, meaning 'getExecutor' will not produce a + // PlanExecutor if the query system would need to fetch the document to do the projection. If + // planning fails due to the NO_COVERED_PROJECTIONS option, then we invoke the planner a second + // time without passing 'projectionObj', resulting in a plan where the agg layer handles the + // projection. + // + // The only way to get meta information (e.g. the text score) is to let the query system handle + // the projection. In all other cases, unless the query system can do an index-covered + // projection and avoid going to the raw record at all, it is faster to have the agg system + // perform the projection. + // // TODO SERVER-42905: It should be possible to push down all eligible projections to the query // layer. This code assumes that metadata is passed from the query layer to the DocumentSource // layer via a projection, which is no longer true. @@ -747,100 +773,14 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep plannerOpts |= QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; } - SortPattern userSortPattern(*sortObj, expCtx); - if (sortStage && canSortBePushedDown(userSortPattern)) { - QueryMetadataBitSet needsSortKey; - needsSortKey.set(DocumentMetadataFields::MetaType::kSortKey); - - // If the $sort has a coalesced $limit, then we push it down as well. Since the $limit was - // after a $sort in the pipeline, it should not have been provided by the caller. - invariant(!limit); - auto limitFollowingSort = sortStage->getLimit(); - - // See if the query system can provide a non-blocking sort. - auto swExecutorSort = - attemptToGetExecutor(opCtx, - collection, - nss, - expCtx, - queryObj, - BSONObj(), // empty projection - expCtx->needsMerge ? needsSortKey : DepsTracker::kNoMetadata, - *sortObj, - limitFollowingSort, - boost::none, /* groupIdForDistinctScan */ - aggRequest, - plannerOpts, - matcherFeatures); - - if (swExecutorSort.isOK()) { - // Success! Now see if the query system can also cover the projection. - auto swExecutorSortAndProj = - attemptToGetExecutor(opCtx, - collection, - nss, - expCtx, - queryObj, - *projectionObj, - deps.metadataDeps(), - *sortObj, - limitFollowingSort, - boost::none, /* groupIdForDistinctScan */ - aggRequest, - plannerOpts, - matcherFeatures); - - std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec; - if (swExecutorSortAndProj.isOK()) { - // Success! We have a non-blocking sort and a covered projection. - exec = std::move(swExecutorSortAndProj.getValue()); - } else if (swExecutorSortAndProj != ErrorCodes::NoQueryExecutionPlans) { - - return swExecutorSortAndProj.getStatus().withContext( - "Failed to determine whether query system can provide a " - "covered projection in addition to a non-blocking sort"); - } else { - // The query system couldn't cover the projection. - *projectionObj = BSONObj(); - exec = std::move(swExecutorSort.getValue()); - } - - // We know the sort (and any $limit which coalesced with the $sort) is being handled by - // the query system, so remove the $sort stage. - pipeline->_sources.pop_front(); - - return std::move(exec); - } else if (swExecutorSort != ErrorCodes::NoQueryExecutionPlans) { - return swExecutorSort.getStatus().withContext( - "Failed to determine whether query system can provide a non-blocking sort"); - } - } - - // Either there was no $sort stage, or the query system could not provide a non-blocking - // sort. - *sortObj = BSONObj(); - - // Since the DocumentSource layer will perform the sort, remove any dependencies we have on the - // query layer for a sort key. - QueryMetadataBitSet metadataDepsWithoutSortKey = deps.metadataDeps(); - metadataDepsWithoutSortKey[DocumentMetadataFields::kSortKey] = false; - if (!metadataDepsWithoutSortKey.any()) { - // A sort key requirement would have prevented us from being able to add this parameter - // before, but now we know the query system won't cover the sort, so we will be able to - // compute the sort key ourselves during the $sort stage, and thus don't need a query - // projection to do so. - plannerOpts |= QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; - } - - // See if the query system can cover the projection. - auto swExecutorProj = attemptToGetExecutor(opCtx, + // See if the query layer can use the projection to produce a covered plan. + auto swExecutorProj = attemptToGetExecutor(expCtx, collection, nss, - expCtx, queryObj, *projectionObj, - metadataDepsWithoutSortKey, - *sortObj, + deps.metadataDeps(), + sortObj, limit, boost::none, /* groupIdForDistinctScan */ aggRequest, @@ -854,18 +794,18 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep "Failed to determine whether query system can provide a covered projection"); } - // The query system couldn't provide a covered or simple uncovered projection. Do no projections - // and request no metadata from the query layer. + // The query system couldn't generate a covered plan for the projection. Make another attempt + // without the projection. We need not request any metadata: if there are metadata dependencies, + // then we always push the projection down to the query layer (which is implemented by + // refraining from setting the 'NO_UNCOVERED_PROJECTIONS' parameter). *projectionObj = BSONObj(); - // If this doesn't work, nothing will. - return attemptToGetExecutor(opCtx, + return attemptToGetExecutor(expCtx, collection, nss, - expCtx, queryObj, *projectionObj, DepsTracker::kNoMetadata, - *sortObj, + sortObj, limit, boost::none, /* groupIdForDistinctScan */ aggRequest, @@ -875,16 +815,12 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep void PipelineD::addCursorSource(Pipeline* pipeline, boost::intrusive_ptr<DocumentSourceCursor> cursor, - DepsTracker deps, - const BSONObj& queryObj, - const BSONObj& sortObj) { + bool shouldProduceEmptyDocs) { // Add the cursor to the pipeline first so that it's correctly disposed of as part of the // pipeline if an exception is thrown during this method. pipeline->addInitialSource(cursor); - cursor->setQuery(queryObj); - cursor->setSort(sortObj); - if (deps.hasNoRequirements()) { + if (shouldProduceEmptyDocs) { cursor->shouldProduceEmptyDocs(); } } diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h index 836b84301b5..20fe571423c 100644 --- a/src/mongo/db/pipeline/pipeline_d.h +++ b/src/mongo/db/pipeline/pipeline_d.h @@ -170,40 +170,42 @@ private: * an index to provide a more efficient sort or projection, the sort and/or projection will be * incorporated into the PlanExecutor. * - * 'sortObj' will be set to an empty object if the query system cannot provide a non-blocking - * sort, and 'projectionObj' will be set to an empty object if the query system cannot provide a - * covered projection. - * * Set 'rewrittenGroupStage' when the pipeline uses $match+$sort+$group stages that are * compatible with a DISTINCT_SCAN plan that visits the first document in each group * (SERVER-9507). + * + * This function computes the dependencies of 'pipeline' and attempts to push the dependency set + * down to the query layer as a projection. If the dependency set indeed results in a projection + * being pushed down to the query layer, this projection is returned in 'projectionObj'. If no + * such projection can be pushed down, then 'projectionObj' is set to the empty BSONObj. This + * can happen if the query system cannot provide a covered projection. + * + * Sets the 'hasNoRequirements' out-parameter based on whether the dependency set is both finite + * and empty. In this case, the query has count semantics. */ static StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> prepareExecutor( - OperationContext* opCtx, + const boost::intrusive_ptr<ExpressionContext>& expCtx, Collection* collection, const NamespaceString& nss, Pipeline* pipeline, - const boost::intrusive_ptr<ExpressionContext>& expCtx, const boost::intrusive_ptr<DocumentSourceSort>& sortStage, std::unique_ptr<GroupFromFirstDocumentTransformation> rewrittenGroupStage, - const DepsTracker& deps, + QueryMetadataBitSet metadataAvailable, const BSONObj& queryObj, boost::optional<long long> limit, const AggregationRequest* aggRequest, const MatchExpressionParser::AllowedFeatureSet& matcherFeatures, - BSONObj* sortObj, - BSONObj* projectionObj); + BSONObj* projectionObj, + bool* hasNoRequirements); /** - * Adds 'cursor' to the front of 'pipeline', using 'deps' to inform the cursor of its - * dependencies. If specified, 'queryObj', 'sortObj' and 'projectionObj' are passed to the - * cursor for explain reporting. + * Adds 'cursor' to the front of 'pipeline'. If 'shouldProduceEmptyDocs' is true, then we inform + * 'cursor' that this is a count scenario -- the dependency set is fully known and is empty. In + * this case, 'cursor' can return a sequence of empty documents for the caller to count. */ static void addCursorSource(Pipeline* pipeline, boost::intrusive_ptr<DocumentSourceCursor> cursor, - DepsTracker deps, - const BSONObj& queryObj = BSONObj(), - const BSONObj& sortObj = BSONObj()); + bool shouldProduceEmptyDocs); }; } // namespace mongo diff --git a/src/mongo/db/pipeline/transformer_interface.h b/src/mongo/db/pipeline/transformer_interface.h index d82970702b3..d726b0c91ff 100644 --- a/src/mongo/db/pipeline/transformer_interface.h +++ b/src/mongo/db/pipeline/transformer_interface.h @@ -67,19 +67,5 @@ public: */ virtual Document serializeTransformation( boost::optional<ExplainOptions::Verbosity> explain) const = 0; - - /** - * Returns true if this transformer is an inclusion projection and is a subset of - * 'proj', which must be a valid projection specification. For example, if this - * TransformerInterface represents the inclusion projection - * - * {a: 1, b: 1, c: 1} - * - * then it is a subset of the projection {a: 1, c: 1}, and this function returns - * true. - */ - virtual bool isSubsetOfProjection(const BSONObj& proj) const { - return false; - } }; } // namespace mongo diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index 665d20d549a..3466e16240c 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -31,7 +31,6 @@ #include "mongo/db/query/explain.h" -#include "mongo/base/owned_pointer_vector.h" #include "mongo/bson/util/builder.h" #include "mongo/db/exec/cached_plan.h" #include "mongo/db/exec/collection_scan.h" @@ -42,6 +41,7 @@ #include "mongo/db/exec/multi_plan.h" #include "mongo/db/exec/near.h" #include "mongo/db/exec/pipeline_proxy.h" +#include "mongo/db/exec/sort.h" #include "mongo/db/exec/text.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/keypattern.h" @@ -983,6 +983,10 @@ void Explain::getSummaryStats(const PlanExecutor& exec, PlanSummaryStats* statsO if (STAGE_SORT == stages[i]->stageType()) { statsOut->hasSortStage = true; + + auto sortStage = static_cast<const SortStage*>(stages[i]); + auto sortStats = static_cast<const SortStats*>(sortStage->getSpecificStats()); + statsOut->usedDisk = sortStats->wasDiskUsed; } if (STAGE_IXSCAN == stages[i]->stageType()) { diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index 9be58ae42b2..f610b940db4 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -1021,6 +1021,14 @@ bool turnIxscanIntoCount(QuerySolution* soln) { return false; } + // Since count scans return no data, they are always forward scans. Index scans, on the other + // hand, may need to scan the index in reverse order in order to obtain a sort. If the index + // scan direction is backwards, then we need to swap the start and end of the count scan bounds. + if (isn->direction < 0) { + startKey.swap(endKey); + std::swap(startKeyInclusive, endKeyInclusive); + } + // Make the count node that we replace the fetch + ixscan with. CountScanNode* csn = new CountScanNode(isn->index); csn->startKey = startKey; diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 27405653ae7..5a0d1c8a18c 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -619,12 +619,6 @@ QuerySolutionNode* QueryPlannerAnalysis::analyzeSort(const CanonicalQuery& query // If we're here, we need to add a sort stage. - // If we're not allowed to put a blocking sort in, bail out. - if (params.options & QueryPlannerParams::NO_BLOCKING_SORT) { - delete solnRoot; - return nullptr; - } - if (!solnRoot->fetched()) { const bool sortIsCovered = std::all_of(sortObj.begin(), sortObj.end(), [solnRoot](BSONElement e) { diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 56bbe55dfe4..46d90750b5c 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -108,9 +108,6 @@ string optionString(size_t options) { case QueryPlannerParams::INCLUDE_SHARD_FILTER: ss << "INCLUDE_SHARD_FILTER "; break; - case QueryPlannerParams::NO_BLOCKING_SORT: - ss << "NO_BLOCKING_SORT "; - break; case QueryPlannerParams::INDEX_INTERSECTION: ss << "INDEX_INTERSECTION "; break; @@ -854,7 +851,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( if (0 == out.size() && relevantIndices.front().type != IndexType::INDEX_WILDCARD) { // Push hinted index solution to output list if found. It is possible to end up without // a solution in the case where a filtering QueryPlannerParams argument, such as - // NO_BLOCKING_SORT, leads to its exclusion. + // NO_UNCOVERED_PROJECTIONS, leads to its exclusion. auto soln = buildWholeIXSoln(relevantIndices.front(), query, params); if (soln) { LOG(5) << "Planner: outputting soln that uses hinted index as scan."; diff --git a/src/mongo/db/query/query_planner_index_test.cpp b/src/mongo/db/query/query_planner_index_test.cpp index e56222e9657..ea8b176f302 100644 --- a/src/mongo/db/query/query_planner_index_test.cpp +++ b/src/mongo/db/query/query_planner_index_test.cpp @@ -585,20 +585,6 @@ TEST_F(QueryPlannerTest, CompoundMultikeyBoundsNoIntersect) { // QueryPlannerParams option tests // -TEST_F(QueryPlannerTest, NoBlockingSortsAllowedTest) { - params.options = QueryPlannerParams::NO_BLOCKING_SORT; - runQuerySortProj(BSONObj(), BSON("x" << 1), BSONObj()); - assertNumSolutions(0U); - - addIndex(BSON("x" << 1)); - - runQuerySortProj(BSONObj(), BSON("x" << 1), BSONObj()); - assertNumSolutions(1U); - assertSolutionExists( - "{fetch: {filter: null, node: {ixscan: " - "{filter: null, pattern: {x: 1}}}}}"); -} - TEST_F(QueryPlannerTest, NoTableScanBasic) { params.options = QueryPlannerParams::NO_TABLE_SCAN; runQuery(BSONObj()); diff --git a/src/mongo/db/query/query_planner_options_test.cpp b/src/mongo/db/query/query_planner_options_test.cpp index d2cdd50ea78..ef4070a70d8 100644 --- a/src/mongo/db/query/query_planner_options_test.cpp +++ b/src/mongo/db/query/query_planner_options_test.cpp @@ -351,15 +351,6 @@ TEST_F(QueryPlannerTest, HintInvalid) { runInvalidQueryHint(BSONObj(), fromjson("{b: 1}")); } -TEST_F(QueryPlannerTest, HintedBlockingSortIndexFilteredOut) { - params.options = QueryPlannerParams::NO_BLOCKING_SORT; - addIndex(BSON("a" << 1)); - addIndex(BSON("b" << 1)); - runQueryAsCommand( - fromjson("{find: 'testns', filter: {a: 1, b: 1}, sort: {b: 1}, hint: {a: 1}}")); - assertNumSolutions(0U); -} - TEST_F(QueryPlannerTest, HintedNotCoveredProjectionIndexFilteredOut) { params.options = QueryPlannerParams::NO_UNCOVERED_PROJECTIONS; addIndex(BSON("a" << 1)); diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 9229dbb06ce..6597211ad7d 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -65,39 +65,36 @@ struct QueryPlannerParams { // See the comment on ShardFilterStage for details. INCLUDE_SHARD_FILTER = 1 << 2, - // Set this if you don't want any plans with a blocking sort stage. All sorts must be - // provided by an index. - NO_BLOCKING_SORT = 1 << 3, - // Set this if you want to turn on index intersection. - INDEX_INTERSECTION = 1 << 4, + INDEX_INTERSECTION = 1 << 3, - // Indicate to the planner that the caller is requesting a count operation, possibly through - // a count command, or as part of an aggregation pipeline. - IS_COUNT = 1 << 5, + // Indicate to the planner that this query could be eligible for count optimization. For + // example, the query {$group: {_id: null, sum: {$sum: 1}}} is a count-like operation and + // could be eligible for the COUNT_SCAN. + IS_COUNT = 1 << 4, // Set this if you want to handle batchSize properly with sort(). If limits on SORT // stages are always actually limits, then this should be left off. If they are // sometimes to be interpreted as batchSize, then this should be turned on. - SPLIT_LIMITED_SORT = 1 << 6, + SPLIT_LIMITED_SORT = 1 << 5, // Set this if you don't want any plans with a non-covered projection stage. All projections // must be provided/covered by an index. - NO_UNCOVERED_PROJECTIONS = 1 << 7, + NO_UNCOVERED_PROJECTIONS = 1 << 6, // Set this to generate covered whole IXSCAN plans. - GENERATE_COVERED_IXSCANS = 1 << 8, + GENERATE_COVERED_IXSCANS = 1 << 7, // Set this to track the most recent timestamp seen by this cursor while scanning the oplog. - TRACK_LATEST_OPLOG_TS = 1 << 9, + TRACK_LATEST_OPLOG_TS = 1 << 8, // Set this so that collection scans on the oplog wait for visibility before reading. - OPLOG_SCAN_WAIT_FOR_VISIBLE = 1 << 10, + OPLOG_SCAN_WAIT_FOR_VISIBLE = 1 << 9, // Set this so that getExecutorDistinct() will only use a plan that _guarantees_ it will // return exactly one document per value of the distinct field. See the comments above the // declaration of getExecutorDistinct() for more detail. - STRICT_DISTINCT_ONLY = 1 << 11, + STRICT_DISTINCT_ONLY = 1 << 10, }; // See Options enum above. |