SERVER-36723 Push $limit beneath DocumentSourceCursor into the PlanStage layer.

In addition towards working towards the general goal of doing as much query execution as possible with a PlanStage tree, this should have a positive performance impact for certain agg pipelines. Previously, a pipeline with a $project (or a $project-like stage such as $addFields) followed by a $limit might have applied this limit only after a full batch of data was loaded by DocumentSourceCursor. After this change, the limit will take effect prior to DocumentSourceCursor batching, and thus may reduce the amount of data processed by the query.
author: David Storch <david.storch@mongodb.com> 2019-10-10 22:16:39 +0000
committer: evergreen <evergreen@mongodb.com> 2019-10-10 22:16:39 +0000
commit: d1a128b434d89f1cba3f1a4a60a117a55291b098 (patch)
tree: 0e961b0b4dc81e2762226b0a084595fbe8ca95f7 /jstests/aggregation
parent: e408478d1f6283e279e57fedf63cd08ac2181d04 (diff)
download: mongo-d1a128b434d89f1cba3f1a4a60a117a55291b098.tar.gz
4 files changed, 287 insertions, 84 deletions
diff --git a/jstests/aggregation/bugs/skip_limit_overflow.js b/jstests/aggregation/bugs/skip_limit_overflow.js
index 50e665b178f..597518d069b 100644
--- a/jstests/aggregation/bugs/skip_limit_overflow.js
+++ b/jstests/aggregation/bugs/skip_limit_overflow.js
@@ -14,6 +14,7 @@ load("jstests/libs/analyze_plan.js");  // For 'aggPlanHasStages' and other expla
 
 const coll = db.server39788;
 coll.drop();
+assert.commandWorked(db.runCommand({create: coll.getName()}));
 
 function testPipeline(pipeline, expectedResult, optimizedAwayStages) {
     const explainOutput = coll.explain().aggregate(pipeline);
@@ -75,27 +76,54 @@ testPipeline([{$sort: {x: -1}}, {$skip: 1}, {$limit: NumberLong("922337203685477
 // absorb the limit and skip stages.
 // Note that we cannot specify limit == 0, so we expect an error in this case.
 testPipeline([{$sort: {x: -1}}, {$skip: 0}, {$limit: NumberLong("9223372036854775807")}],
-             {"$cursor.limit": [NumberLong("9223372036854775807")]},
+             {"$sort.limit": [NumberLong("9223372036854775807")]},
              ["$skip", "$limit"]);
 
 // Case where limit + skip do not overflow. One value is MAX_LONG - 1 and another one is 1.
 // Should be able to absorb the limit stage.
 testPipeline([{$sort: {x: -1}}, {$skip: NumberLong("9223372036854775806")}, {$limit: 1}],
              {
-                 "$cursor.limit": [NumberLong("9223372036854775807")],
+                 "$sort.limit": [NumberLong("9223372036854775807")],
                  "$skip": [NumberLong("9223372036854775806")]
              },
              ["$limit"]);
 testPipeline([{$sort: {x: -1}}, {$skip: 1}, {$limit: NumberLong("9223372036854775806")}],
-             {"$cursor.limit": [NumberLong("9223372036854775807")], "$skip": [NumberLong(1)]},
+             {"$sort.limit": [NumberLong("9223372036854775807")], "$skip": [NumberLong(1)]},
              ["$limit"]);
 
+// Case where the first $limit can be pushed down, but the second overflows and thus remains in
+// place.
+testPipeline(
+    [
+        {$sort: {x: -1}},
+        {$skip: NumberLong("9223372036854775800")},
+        {$limit: 7},
+        {$skip: 10},
+        {$limit: 1}
+    ],
+    {"$sort.limit": [NumberLong("9223372036854775807")], "$limit": [NumberLong(1)]});
+
+// Case with multiple $limit and $skip stages where the second $limit ends up being the smallest.
+// There is no overflow in this case.
+testPipeline(
+    [
+        {$sort: {x: -1}},
+        {$skip: NumberLong("9223372036854775800")},
+        {$limit: 7},
+        {$skip: 3},
+        {$limit: 1}
+    ],
+    {
+        "$sort.limit": [NumberLong("9223372036854775804")],
+        "$skip": [NumberLong("9223372036854775803")]
+    });
+
 // Case where limit + skip do not overflow. Both values are < MAX_LONG.
 testPipeline([{$sort: {x: -1}}, {$skip: 674761616283}, {$limit: 35361718}],
-             {"$cursor.limit": [NumberLong(674796978001)], "$skip": [NumberLong(674761616283)]},
+             {"$sort.limit": [NumberLong(674796978001)], "$skip": [NumberLong(674761616283)]},
              ["$limit"]);
 testPipeline([{$sort: {x: -1}}, {$skip: 35361718}, {$limit: 674761616283}],
-             {"$cursor.limit": [NumberLong(674796978001)], "$skip": [NumberLong(35361718)]},
+             {"$sort.limit": [NumberLong(674796978001)], "$skip": [NumberLong(35361718)]},
              ["$limit"]);
 
 // Case where where overflow of limit + skip + skip prevents limit stage from being absorbed.
diff --git a/jstests/aggregation/explain_limit.js b/jstests/aggregation/explain_limit.js
index 7b725548c96..f2a4ff28aff 100644
--- a/jstests/aggregation/explain_limit.js
+++ b/jstests/aggregation/explain_limit.js
@@ -7,39 +7,35 @@ load("jstests/libs/analyze_plan.js");  // For getAggPlanStages().
 
 let coll = db.explain_limit;
 
-const kMultipleSolutionLimit = 101;
-const kCollSize = kMultipleSolutionLimit + 5;
+const kCollSize = 105;
 const kLimit = 10;
 
 // Return whether or explain() was successful and contained the appropriate fields given the
-// requested verbosity. Checks that the number of documents examined is correct based on
-// 'multipleSolutions', which indicates there was more than one plan available.
-function checkResults({results, verbosity, multipleSolutions}) {
-    let cursorSubdocs = getAggPlanStages(results, "$cursor");
+// requested verbosity. Checks that the number of documents examined and returned are correct given
+// the value of the limit.
+function checkResults({results, verbosity}) {
+    let cursorSubdocs = getAggPlanStages(results, "LIMIT");
     assert.gt(cursorSubdocs.length, 0);
     for (let stageResult of cursorSubdocs) {
-        assert(stageResult.hasOwnProperty("$cursor"));
-        let result = stageResult.$cursor;
-
-        assert.eq(result.limit, NumberLong(kLimit), tojson(results));
+        assert.eq(stageResult.limitAmount, NumberLong(kLimit), results);
+        if (verbosity !== "queryPlanner") {
+            assert.eq(stageResult.nReturned, NumberLong(kLimit), results);
+        }
+    }
 
-        if (verbosity === "queryPlanner") {
-            assert(!result.hasOwnProperty("executionStats"), tojson(results));
+    // Explain should report that we only have to examine as many documents as the limit.
+    if (verbosity !== "queryPlanner") {
+        if (results.hasOwnProperty("executionStats")) {
+            assert.eq(results.executionStats.nReturned, kLimit, results);
+            assert.eq(results.executionStats.totalDocsExamined, kLimit, results);
         } else {
-            // If it's "executionStats" or "allPlansExecution".
-            if (multipleSolutions) {
-                // If there's more than one plan available, we may run several of them against
-                // each other to see which is fastest. During this, our limit may be ignored
-                // and so explain may return that it examined more documents than we asked it
-                // to.
-                assert.lte(
-                    result.executionStats.nReturned, kMultipleSolutionLimit, tojson(results));
-                assert.lte(result.executionStats.totalDocsExamined,
-                           kMultipleSolutionLimit,
-                           tojson(results));
-            } else {
-                assert.eq(result.executionStats.nReturned, kLimit, tojson(results));
-                assert.eq(result.executionStats.totalDocsExamined, kLimit, tojson(results));
+            // This must be output for a sharded explain. Verify that each shard reports the
+            // expected execution stats.
+            assert(results.hasOwnProperty("shards"));
+            for (let elem in results.shards) {
+                const shardExecStats = results.shards[elem].executionStats;
+                assert.eq(shardExecStats.nReturned, kLimit, results);
+                assert.eq(shardExecStats.totalDocsExamined, kLimit, results);
             }
         }
     }
@@ -59,11 +55,10 @@ let plannerLevel = coll.explain("queryPlanner").aggregate(pipeline);
 checkResults({results: plannerLevel, verbosity: "queryPlanner"});
 
 let execLevel = coll.explain("executionStats").aggregate(pipeline);
-checkResults({results: execLevel, verbosity: "executionStats", multipleSolutions: false});
+checkResults({results: execLevel, verbosity: "executionStats"});
 
 let allPlansExecLevel = coll.explain("allPlansExecution").aggregate(pipeline);
-checkResults(
-    {results: allPlansExecLevel, verbosity: "allPlansExecution", multipleSolutions: false});
+checkResults({results: allPlansExecLevel, verbosity: "allPlansExecution"});
 
 // Create a second index so that more than one plan is available.
 assert.commandWorked(coll.createIndex({a: 1, b: 1}));
@@ -72,8 +67,8 @@ plannerLevel = coll.explain("queryPlanner").aggregate(pipeline);
 checkResults({results: plannerLevel, verbosity: "queryPlanner"});
 
 execLevel = coll.explain("executionStats").aggregate(pipeline);
-checkResults({results: execLevel, verbosity: "executionStats", multipleSolutions: true});
+checkResults({results: execLevel, verbosity: "executionStats"});
 
 allPlansExecLevel = coll.explain("allPlansExecution").aggregate(pipeline);
-checkResults({results: allPlansExecLevel, verbosity: "allPlansExecution", multipleSolutions: true});
+checkResults({results: allPlansExecLevel, verbosity: "allPlansExecution"});
 })();
diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js
index 686414b7035..d3634837ef0 100644
--- a/jstests/aggregation/optimize_away_pipeline.js
+++ b/jstests/aggregation/optimize_away_pipeline.js
@@ -3,10 +3,9 @@
 // collapsed into a single $cursor source pipeline. The resulting cursor in this case will look
 // like what the client would have gotten from find command.
 //
-// Relies on the pipeline stages to be collapsed into a single $cursor stage, so pipelines cannot
-// be wrapped into a facet stage to not prevent this optimization.
-// TODO SERVER-40323: Plan analyzer helper functions cannot correctly handle explain output for
-// sharded collections.
+// Relies on the pipeline stages to be collapsed into a single $cursor stage, so pipelines cannot be
+// wrapped into a facet stage to not prevent this optimization. Also, this test is not prepared to
+// handle explain output for sharded collections.
 // @tags: [do_not_wrap_aggregations_in_facets, assumes_unsharded_collection]
 (function() {
 "use strict";
@@ -28,12 +27,17 @@ assert.commandWorked(coll.insert({_id: 3, x: 30}));
 // the explain output. If 'expectedResult' is provided, the pipeline is executed and the
 // returned result as validated agains the expected result without respecting the order of the
 // documents. If 'preserveResultOrder' is 'true' - the order is respected.
+//
+// If 'optimizedAwayStages' is non-null, then it should contain a list of agg plan stages that
+// should *not* be present in the pipeline, since their execution was pushed down into the query
+// layer. The test will verify that this pushdown is reflected in explain output.
 function assertPipelineUsesAggregation({
     pipeline = [],
     pipelineOptions = {},
-    expectedStage = null,
+    expectedStages = null,
     expectedResult = null,
-    preserveResultOrder = false
+    preserveResultOrder = false,
+    optimizedAwayStages = null
 } = {}) {
     const explainOutput = coll.explain().aggregate(pipeline, pipelineOptions);
 
@@ -45,6 +49,14 @@ function assertPipelineUsesAggregation({
                " *not* to use a query layer at the root level in the explain output: " +
                tojson(explainOutput));
 
+    if (optimizedAwayStages) {
+        for (let stage of optimizedAwayStages) {
+            assert(!aggPlanHasStage(explainOutput, stage),
+                   "Expected pipeline " + tojsononeline(pipeline) + " to *not* include a " + stage +
+                       " stage in the explain output: " + tojson(explainOutput));
+        }
+    }
+
     let cursor = getAggPlanStage(explainOutput, "$cursor");
     if (cursor) {
         cursor = cursor.$cursor;
@@ -58,9 +70,14 @@ function assertPipelineUsesAggregation({
     assert(cursor.queryPlanner.optimizedPipeline === undefined,
            "Expected pipeline " + tojsononeline(pipeline) + " to *not* include an " +
                "'optimizedPipeline' field in the explain output: " + tojson(explainOutput));
-    assert(aggPlanHasStage(explainOutput, expectedStage),
-           "Expected pipeline " + tojsononeline(pipeline) + " to include a " + expectedStage +
-               " stage in the explain output: " + tojson(explainOutput));
+
+    if (expectedStages) {
+        for (let expectedStage of expectedStages) {
+            assert(aggPlanHasStage(explainOutput, expectedStage),
+                   "Expected pipeline " + tojsononeline(pipeline) + " to include a " +
+                       expectedStage + " stage in the explain output: " + tojson(explainOutput));
+        }
+    }
 
     if (expectedResult) {
         const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray();
@@ -80,7 +97,7 @@ function assertPipelineUsesAggregation({
 function assertPipelineDoesNotUseAggregation({
     pipeline = [],
     pipelineOptions = {},
-    expectedStage = null,
+    expectedStages = null,
     expectedResult = null,
     preserveResultOrder = false
 } = {}) {
@@ -106,9 +123,14 @@ function assertPipelineDoesNotUseAggregation({
                "Expected pipeline " + tojsononeline(pipeline) + " to include an " +
                    "'optimizedPipeline' field in the explain output: " + tojson(explainOutput));
     }
-    assert(planHasStage(db, explainOutput, expectedStage),
-           "Expected pipeline " + tojsononeline(pipeline) + " to include a " + expectedStage +
-               " stage in the explain output: " + tojson(explainOutput));
+
+    if (expectedStages) {
+        for (let expectedStage of expectedStages) {
+            assert(planHasStage(db, explainOutput, expectedStage),
+                   "Expected pipeline " + tojsononeline(pipeline) + " to include a " +
+                       expectedStage + " stage in the explain output: " + tojson(explainOutput));
+        }
+    }
 
     if (expectedResult) {
         const actualResult = coll.aggregate(pipeline, pipelineOptions).toArray();
@@ -135,7 +157,7 @@ let explainOutput;
 // single cursor stage.
 assertPipelineDoesNotUseAggregation({
     pipeline: [],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{_id: 1, x: 10}, {_id: 2, x: 20}, {_id: 3, x: 30}]
 });
 assertPipelineDoesNotUseAggregation(
@@ -148,7 +170,7 @@ assert.commandWorked(coll.insert({_id: 4, x: 40, b: "abc"}));
 assertPipelineDoesNotUseAggregation({
     pipeline: [{$match: {b: "ABC"}}],
     pipelineOptions: {collation: {locale: "en_US", strength: 2}},
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{_id: 4, x: 40, b: "abc"}]
 });
 assert.commandWorked(coll.deleteOne({_id: 4}));
@@ -161,20 +183,19 @@ assert.commandWorked(coll.deleteOne({_id: 4}));
 assert.commandWorked(coll.createIndex({x: 1}));
 assertPipelineDoesNotUseAggregation({
     pipeline: [{$sort: {x: 1}}, {$project: {x: 1, _id: 0}}],
-    expectedStage: "IXSCAN",
+    expectedStages: ["IXSCAN"],
     expectedResult: [{x: 10}, {x: 20}, {x: 30}],
     preserveResultOrder: true
 });
 assertPipelineDoesNotUseAggregation({
     pipeline: [{$match: {x: {$gte: 20}}}, {$sort: {x: 1}}, {$project: {x: 1, _id: 0}}],
-    expectedStage: "IXSCAN",
+    expectedStages: ["IXSCAN"],
     expectedResult: [{x: 20}, {x: 30}],
     preserveResultOrder: true
 });
-// TODO: SERVER-36723 We cannot collapse if there is a $limit stage though.
-assertPipelineUsesAggregation({
+assertPipelineDoesNotUseAggregation({
     pipeline: [{$match: {x: {$gte: 20}}}, {$sort: {x: 1}}, {$limit: 1}, {$project: {x: 1, _id: 0}}],
-    expectedStage: "IXSCAN",
+    expectedStages: ["IXSCAN"],
     expectedResult: [{x: 20}]
 });
 assert.commandWorked(coll.dropIndexes());
@@ -185,68 +206,227 @@ assert.commandWorked(coll.dropIndexes());
 assert.commandWorked(coll.insert({_id: 4, x: 40, a: {b: "ab1"}}));
 assertPipelineUsesAggregation({
     pipeline: [{$project: {x: 1, _id: 0}}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{x: 10}, {x: 20}, {x: 30}, {x: 40}]
 });
 assertPipelineUsesAggregation({
     pipeline: [{$match: {x: 20}}, {$project: {x: 1, _id: 0}}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{x: 20}]
 });
 assertPipelineUsesAggregation({
     pipeline: [{$project: {x: 1, "a.b": 1, _id: 0}}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{x: 10}, {x: 20}, {x: 30}, {x: 40, a: {b: "ab1"}}]
 });
 assertPipelineUsesAggregation({
     pipeline: [{$match: {x: 40}}, {$project: {"a.b": 1, _id: 0}}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{a: {b: "ab1"}}]
 });
 assert.commandWorked(coll.deleteOne({_id: 4}));
 
-// TODO SERVER-36723: $limit stage is not supported yet.
-assertPipelineUsesAggregation({
-    pipeline: [{$match: {x: 20}}, {$limit: 1}],
-    expectedStage: "COLLSCAN",
-    expectedResult: [{_id: 2, x: 20}]
-});
-// TODO SERVER-36723: $skip stage is not supported yet.
+// TODO SERVER-40909: $skip stage is not supported yet.
 assertPipelineUsesAggregation({
     pipeline: [{$match: {x: {$gte: 20}}}, {$skip: 1}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{_id: 3, x: 30}]
 });
 // We cannot collapse a $project stage if it has a complex pipeline expression.
 assertPipelineUsesAggregation(
-    {pipeline: [{$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}}], expectedStage: "COLLSCAN"});
+    {pipeline: [{$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}}], expectedStages: ["COLLSCAN"]});
 assertPipelineUsesAggregation({
     pipeline: [{$match: {x: 20}}, {$project: {x: {$substr: ["$y", 0, 1]}, _id: 0}}],
-    expectedStage: "COLLSCAN"
+    expectedStages: ["COLLSCAN"]
 });
 // We cannot optimize away a pipeline if there are stages which have no equivalent in the
 // find command.
 assertPipelineUsesAggregation({
     pipeline: [{$match: {x: {$gte: 20}}}, {$count: "count"}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{count: 2}]
 });
 assertPipelineUsesAggregation({
     pipeline: [{$match: {x: {$gte: 20}}}, {$group: {_id: "null", s: {$sum: "$x"}}}],
-    expectedStage: "COLLSCAN",
+    expectedStages: ["COLLSCAN"],
     expectedResult: [{_id: "null", s: 50}]
 });
 // TODO SERVER-40253: We cannot optimize away text search queries.
 assert.commandWorked(coll.createIndex({y: "text"}));
 assertPipelineUsesAggregation(
-    {pipeline: [{$match: {$text: {$search: "abc"}}}], expectedStage: "IXSCAN"});
+    {pipeline: [{$match: {$text: {$search: "abc"}}}], expectedStages: ["IXSCAN"]});
 assert.commandWorked(coll.dropIndexes());
 // We cannot optimize away geo near queries.
 assert.commandWorked(coll.createIndex({"y": "2d"}));
 assertPipelineUsesAggregation({
     pipeline: [{$geoNear: {near: [0, 0], distanceField: "y", spherical: true}}],
-    expectedStage: "GEO_NEAR_2D"
+    expectedStages: ["GEO_NEAR_2D"],
+});
+assert.commandWorked(coll.dropIndexes());
+
+// Test cases around pushdown of $limit.
+assert.commandWorked(coll.createIndex({x: 1}));
+
+// A lone $limit pipeline can be optimized away.
+assertPipelineDoesNotUseAggregation({
+    pipeline: [{$limit: 1}],
+    expectedStages: ["COLLSCAN", "LIMIT"],
+});
+
+// $match followed by $limit can be optimized away.
+assertPipelineDoesNotUseAggregation({
+    pipeline: [{$match: {x: 20}}, {$limit: 1}],
+    expectedStages: ["IXSCAN", "LIMIT"],
+    expectedResult: [{_id: 2, x: 20}],
+});
+
+// $limit followed by $match cannot be fully optimized away. The $limit is pushed down, but the
+// $match is executed in the agg layer.
+assertPipelineUsesAggregation({
+    pipeline: [{$limit: 1}, {$match: {x: 20}}],
+    expectedStages: ["COLLSCAN", "LIMIT"],
+    optimizedAwayStages: ["$limit"],
 });
+
+// $match, $project, $limit can be optimized away when the projection is covered.
+assertPipelineDoesNotUseAggregation({
+    pipeline: [{$match: {x: {$gte: 20}}}, {$project: {_id: 0, x: 1}}, {$limit: 1}],
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    expectedResult: [{x: 20}],
+});
+
+// $match, $project, and $limit cannot be optimized away when the projection is not covered. But the
+// $limit can be pushed down into the query layer.
+assertPipelineUsesAggregation({
+    pipeline: [{$match: {x: {$gte: 20}}}, {$project: {_id: 0, x: 1, y: 1}}, {$limit: 1}],
+    expectedStages: ["IXSCAN", "FETCH", "LIMIT"],
+    expectedResult: [{x: 20}],
+    optimizedAwayStages: ["$limit"],
+});
+
+// $match, $project, $limit, $sort cannot be optimized away because the $limit comes before the
+// $sort.
+assertPipelineUsesAggregation({
+    pipeline: [{$match: {x: {$gte: 20}}}, {$project: {_id: 0, x: 1}}, {$limit: 1}, {$sort: {x: 1}}],
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    expectedResult: [{x: 20}],
+    optimizedAwayStages: ["$project", "$limit"],
+});
+
+// $match, $sort, $limit can be optimized away.
+assertPipelineDoesNotUseAggregation({
+    pipeline: [{$match: {x: {$gte: 20}}}, {$sort: {x: -1}}, {$limit: 2}],
+    expectedStages: ["IXSCAN", "LIMIT"],
+    expectedResult: [{_id: 3, x: 30}, {_id: 2, x: 20}],
+});
+
+// $match, $sort, $limit, $project can be optimized away.
+assertPipelineDoesNotUseAggregation({
+    pipeline:
+        [{$match: {x: {$gte: 20}}}, {$sort: {x: -1}}, {$limit: 2}, {$project: {_id: 0, x: 1}}],
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    expectedResult: [{x: 30}, {x: 20}],
+});
+
+// $match, $sort, $project, $limit can be optimized away.
+assertPipelineDoesNotUseAggregation({
+    pipeline:
+        [{$match: {x: {$gte: 20}}}, {$sort: {x: -1}}, {$project: {_id: 0, x: 1}}, {$limit: 2}],
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    expectedResult: [{x: 30}, {x: 20}],
+});
+
+// $match, $sort, $limit, $project can be optimized away, where limits must swap and combine to
+// enable pushdown.
+assertPipelineDoesNotUseAggregation({
+    pipeline: [
+        {$match: {x: {$gte: 20}}},
+        {$sort: {x: -1}},
+        {$limit: 3},
+        {$project: {_id: 0, x: 1}},
+        {$limit: 2}
+    ],
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    expectedResult: [{x: 30}, {x: 20}],
+});
+
+// For $sort, $limit, $group, the $sort and $limit can be pushed down, but $group cannot.
+assertPipelineUsesAggregation({
+    pipeline: [{$sort: {x: 1}}, {$limit: 2}, {$group: {_id: null, s: {$sum: "$x"}}}],
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    expectedResult: [{_id: null, s: 30}],
+    optimizedAwayStages: ["$sort", "$limit"],
+});
+
+// Test that $limit can be pushed down before a group, but it prohibits the DISTINCT_SCAN
+// optimization.
+assertPipelineUsesAggregation({
+    pipeline: [{$group: {_id: "$x"}}],
+    expectedStages: ["DISTINCT_SCAN", "PROJECTION_COVERED"],
+    expectedResult: [{_id: 10}, {_id: 20}, {_id: 30}],
+});
+assertPipelineUsesAggregation({
+    pipeline: [{$limit: 2}, {$group: {_id: "$x"}}],
+    expectedStages: ["COLLSCAN", "LIMIT"],
+    optimizedAwayStages: ["$limit"],
+});
+assertPipelineUsesAggregation({
+    pipeline: [{$sort: {x: 1}}, {$group: {_id: "$x"}}],
+    expectedStages: ["DISTINCT_SCAN", "PROJECTION_COVERED"],
+    expectedResult: [{_id: 10}, {_id: 20}, {_id: 30}],
+    optimizedAwayStages: ["$sort"],
+});
+assertPipelineUsesAggregation({
+    pipeline: [{$sort: {x: 1}}, {$limit: 2}, {$group: {_id: "$x"}}],
+    expectedResult: [{_id: 10}, {_id: 20}],
+    expectedStages: ["IXSCAN", "LIMIT"],
+    optimizedAwayStages: ["$sort", "$limit"],
+});
+
+// $limit after a group has no effect on our ability to produce a DISTINCT_SCAN plan.
+assertPipelineUsesAggregation({
+    pipeline: [{$group: {_id: "$x"}}, {$sort: {_id: 1}}, {$limit: 2}],
+    expectedStages: ["DISTINCT_SCAN", "PROJECTION_COVERED"],
+    expectedResult: [{_id: 10}, {_id: 20}],
+});
+
+// For $limit, $project, $limit, we can optimize away both $limit stages.
+let pipeline = [{$match: {x: {$gte: 0}}}, {$limit: 2}, {$project: {_id: 0, x: 1}}, {$limit: 1}];
+assertPipelineDoesNotUseAggregation({
+    pipeline: pipeline,
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+});
+// Make sure that we end up using the smaller limit of 1.
+let explain = coll.explain().aggregate(pipeline);
+let limitStage = getAggPlanStage(explain, "LIMIT");
+assert.neq(null, limitStage, explain);
+assert.eq(1, limitStage.limitAmount, explain);
+
+// We can optimize away interleaved $limit and $skip after a project. The $limits can be collapsed
+// into a single $limit:35 prior to the $skip stages. We currently do not push down $skip into the
+// PlanStage layer (see SERVER-40909), which prevents this pipeline from being entirely optimized
+// away.
+pipeline = [
+    {$match: {x: {$gte: 0}}},
+    {$project: {_id: 0, x: 1}},
+    {$skip: 20},
+    {$limit: 15},
+    {$skip: 10},
+    {$limit: 7}
+];
+assertPipelineUsesAggregation({
+    pipeline: pipeline,
+    expectedStages: ["IXSCAN", "PROJECTION_COVERED", "LIMIT"],
+    optimizedAwayStages: ["$match", "$limit"],
+});
+explain = coll.explain().aggregate(pipeline);
+limitStage = getAggPlanStage(explain, "LIMIT");
+assert.neq(null, limitStage, explain);
+assert.eq(35, limitStage.limitAmount, explain);
+let skipStage = getAggPlanStage(explain, "$skip");
+assert.neq(null, skipStage, explain);
+assert.eq(30, skipStage.$skip, explain);
+
 assert.commandWorked(coll.dropIndexes());
 
 // getMore cases.
diff --git a/jstests/aggregation/sources/sort/explain_sort.js b/jstests/aggregation/sources/sort/explain_sort.js
index ac93acc11ab..f6d22e9e719 100644
--- a/jstests/aggregation/sources/sort/explain_sort.js
+++ b/jstests/aggregation/sources/sort/explain_sort.js
@@ -13,7 +13,7 @@ const kNumDocs = 10;
 
 // Return whether or not explain() was successful and contained the appropriate fields given the
 // requested verbosity.
-function checkResults(results, verbosity) {
+function checkResults(results, verbosity, expectedNumResults = kNumDocs) {
     let cursorSubdocs = getAggPlanStages(results, "$cursor");
     let nReturned = 0;
     let nExamined = 0;
@@ -22,15 +22,15 @@ function checkResults(results, verbosity) {
         const result = stageResult.$cursor;
         if (verbosity === "queryPlanner") {
             assert(!result.hasOwnProperty("executionStats"), tojson(results));
-        } else {
-            nReturned += result.executionStats.nReturned;
-            nExamined += result.executionStats.totalDocsExamined;
+        } else if (cursorSubdocs.length === 1) {
+            // If there was a single shard, then we can assert that 'nReturned' and
+            // 'totalDocsExamined' are as expected. If there are multiple shards, these assertions
+            // might not hold, since each shard enforces the limit on its own and then the merging
+            // node enforces the limit again to obtain the final result set.
+            assert.eq(result.executionStats.nReturned, expectedNumResults, tojson(results));
+            assert.eq(result.executionStats.totalDocsExamined, expectedNumResults, tojson(results));
         }
     }
-    if (verbosity != "queryPlanner") {
-        assert.eq(nReturned, kNumDocs, tojson(results));
-        assert.eq(nExamined, kNumDocs, tojson(results));
-    }
 }
 
 for (let i = 0; i < kNumDocs; i++) {
@@ -53,9 +53,9 @@ for (let verbosity of ["queryPlanner", "executionStats", "allPlansExecution"]) {
     checkResults(coll.explain(verbosity).aggregate(pipeline), verbosity);
 
     pipeline = [{$project: {a: 1}}, {$limit: 5}, {$sort: {a: 1}}];
-    checkResults(coll.explain(verbosity).aggregate(pipeline), verbosity);
+    checkResults(coll.explain(verbosity).aggregate(pipeline), verbosity, 5);
 
     pipeline = [{$project: {_id: 1}}, {$limit: 5}];
-    checkResults(coll.explain(verbosity).aggregate(pipeline), verbosity);
+    checkResults(coll.explain(verbosity).aggregate(pipeline), verbosity, 5);
 }
 })();
author	David Storch <david.storch@mongodb.com>	2019-10-10 22:16:39 +0000
committer	evergreen <evergreen@mongodb.com>	2019-10-10 22:16:39 +0000
commit	d1a128b434d89f1cba3f1a4a60a117a55291b098 (patch)
tree	0e961b0b4dc81e2762226b0a084595fbe8ca95f7 /jstests/aggregation
parent	e408478d1f6283e279e57fedf63cd08ac2181d04 (diff)
download	mongo-d1a128b434d89f1cba3f1a4a60a117a55291b098.tar.gz