SERVER-13946 Put skip stages before fetch stages.

(cherry picked from commit 69f3e89f6921fc4ff2b5413952eeb517af69bb83)
author: Justin Seyster <justin.seyster@mongodb.com> 2018-06-22 18:36:03 -0400
committer: Justin Seyster <justin.seyster@mongodb.com> 2018-10-15 17:33:40 -0400
commit: 8e540c0b6db93ce994cc548f000900bdc740f80a (patch)
tree: e4a63314f67158d2d7a784a029f04e0d28865b88
parent: f143314e5d5b1021ed11ab2a2c57d0e6ada24926 (diff)
download: mongo-8e540c0b6db93ce994cc548f000900bdc740f80a.tar.gz
3 files changed, 122 insertions, 14 deletions
diff --git a/jstests/core/add_skip_stage_before_fetch.js b/jstests/core/add_skip_stage_before_fetch.js
new file mode 100644
index 00000000000..064cd1ac5e7
--- /dev/null
+++ b/jstests/core/add_skip_stage_before_fetch.js
@@ -0,0 +1,63 @@
+// SERVER-13946: When possible, place skip stages before fetch stages to avoid unnecessarily
+// fetching documents that will be immediately discarded.
+
+// The skip operation in a sharded query always occurs in the mongoS, so this test doesn't make
+// sense on a sharded collection.
+// @tags: [assumes_unsharded_collection]
+
+(function() {
+    "use strict";
+
+    load("jstests/libs/analyze_plan.js");
+
+    const coll = db.add_skip_stage_before_fetch;
+
+    coll.drop();
+    const testIndex = {a: 1, b: 1, c: 1};
+    assert.commandWorked(coll.createIndex(testIndex));
+
+    const bulk = coll.initializeUnorderedBulkOp();
+    for (let i = 0; i < 10000; i++) {
+        bulk.insert({
+            a: i % 2,
+            b: i % 4,
+            c: Math.floor(Math.random() * 1000),
+            d: Math.floor(Math.random() * 1000)
+        });
+    }
+    assert.writeOK(bulk.execute());
+
+    // The {a: 0, b: 2} query will match exactly one quarter of the documents in the collection:
+    // 2500 in total. In the test queries below, we skip the first 2400, returning exactly 100
+    // documents.
+
+    // This find can be computed using the index, so we should only need to fetch the 100 documents
+    // that get returned to the client after skipping the first 2400.
+    let explainResult =
+        coll.find({a: 0, b: 2}).hint(testIndex).skip(2400).explain("executionStats");
+    assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+    assert.eq(explainResult.executionStats.totalDocsExamined, 100);
+
+    // This sort can also be computed using the index.
+    explainResult =
+        coll.find({a: 0, b: 2}).hint(testIndex).sort({c: 1}).skip(2400).explain("executionStats");
+    assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+    assert.eq(explainResult.executionStats.totalDocsExamined, 100);
+
+    // This query is covered by the index, so there should be no fetch at all.
+    explainResult = coll.find({a: 0, b: 2}, {_id: 0, a: 1})
+                        .hint(testIndex)
+                        .sort({c: 1})
+                        .skip(2400)
+                        .explain("executionStats");
+    assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+    assert.eq(explainResult.executionStats.totalDocsExamined, 0);
+    assert(isIndexOnly(explainResult.queryPlanner.winningPlan));
+
+    // This sort requires a field that is not in the index, so we should be fetching all 2500
+    // documents that match the find predicate.
+    explainResult =
+        coll.find({a: 0, b: 2}).hint(testIndex).sort({d: 1}).skip(2400).explain("executionStats");
+    assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+    assert.eq(explainResult.executionStats.totalDocsExamined, 2500);
+})();
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index cd11da91262..fe174018d47 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -723,6 +723,13 @@ QuerySolution* QueryPlannerAnalysis::analyzeDataAccess(
         }
     }
 
+    if (qr.getSkip()) {
+        auto skip = std::make_unique<SkipNode>();
+        skip->skip = *qr.getSkip();
+        skip->children.push_back(solnRoot.release());
+        solnRoot = std::move(skip);
+    }
+
     // Project the results.
     if (NULL != query.getProj()) {
         LOG(5) << "PROJECTION: Current plan is:\n" << redact(solnRoot->toString());
@@ -829,13 +836,6 @@ QuerySolution* QueryPlannerAnalysis::analyzeDataAccess(
         }
     }
 
-    if (qr.getSkip()) {
-        SkipNode* skip = new SkipNode();
-        skip->skip = *qr.getSkip();
-        skip->children.push_back(solnRoot.release());
-        solnRoot.reset(skip);
-    }
-
     // When there is both a blocking sort and a limit, the limit will
     // be enforced by the blocking sort.
     // Otherwise, we need to limit the results in the case of a hard limit
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 4293806ae38..8f6b0293a84 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -425,10 +425,57 @@ TEST_F(QueryPlannerTest, BasicSkipWithIndex) {
     ASSERT_EQUALS(getNumSolutions(), 2U);
     assertSolutionExists("{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5}}}}}");
     assertSolutionExists(
-        "{skip: {n: 8, node: {fetch: {filter: null, node: "
+        "{fetch: {filter: null, node: {skip: {n: 8, node: "
         "{ixscan: {filter: null, pattern: {a: 1, b: 1}}}}}}}");
 }
 
+TEST_F(QueryPlannerTest, CoveredSkipWithIndex) {
+    addIndex(fromjson("{a: 1, b: 1}"));
+
+    runQuerySortProjSkipNToReturn(
+        fromjson("{a: 5}"), BSONObj(), fromjson("{_id: 0, a: 1, b: 1}"), 8, 0);
+
+    ASSERT_EQUALS(getNumSolutions(), 2U);
+    assertSolutionExists(
+        "{proj: {spec: {_id: 0, a: 1, b: 1}, node: "
+        "{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5}}}}}}}");
+    assertSolutionExists(
+        "{proj: {spec: {_id: 0, a: 1, b: 1}, "
+        "node: {skip: {n: 8, node: {ixscan: {filter: null, pattern: {a: 1, b: 1}}}}}}}");
+}
+
+TEST_F(QueryPlannerTest, SkipEvaluatesAfterFetchWithPredicate) {
+    addIndex(fromjson("{a: 1}"));
+
+    runQuerySkipNToReturn(fromjson("{a: 5, b: 7}"), 8, 0);
+
+    ASSERT_EQUALS(getNumSolutions(), 2U);
+    assertSolutionExists("{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5, b: 7}}}}}");
+
+    // When a plan includes a fetch with no predicate, the skip should execute first, so we avoid
+    // fetching a document that we will always discard. When the fetch does have a predicate (as in
+    // this case), however, that optimization would be invalid; we need to fetch the document and
+    // evaluate the filter to determine if the document should count towards the number of skipped
+    // documents.
+    assertSolutionExists(
+        "{skip: {n: 8, node: {fetch: {filter: {b: 7}, node: "
+        "{ixscan: {filter: null, pattern: {a: 1}}}}}}}");
+}
+
+TEST_F(QueryPlannerTest, SkipEvaluatesBeforeFetchForIndexedOr) {
+    addIndex(fromjson("{a: 1}"));
+
+    runQuerySkipNToReturn(fromjson("{$or: [{a: 5}, {a: 7}]}"), 8, 0);
+
+    ASSERT_EQUALS(getNumSolutions(), 2U);
+    assertSolutionExists(
+        "{skip: {n: 8, node: "
+        "{cscan: {dir: 1, filter: {$or: [{a: 5}, {a: 7}]}}}}}");
+    assertSolutionExists(
+        "{fetch: {filter: null, node: {skip: {n: 8, node: "
+        "{ixscan: {filter: null, pattern: {a: 1}}}}}}}");
+}
+
 TEST_F(QueryPlannerTest, BasicLimitNoIndex) {
     addIndex(BSON("a" << 1));
 
@@ -481,9 +528,8 @@ TEST_F(QueryPlannerTest, SkipAndLimit) {
         "{limit: {n: 2, node: {skip: {n: 7, node: "
         "{cscan: {dir: 1, filter: {x: {$lte: 4}}}}}}}}");
     assertSolutionExists(
-        "{limit: {n: 2, node: {skip: {n: 7, node: {fetch: "
-        "{filter: null, node: {ixscan: "
-        "{filter: null, pattern: {x: 1}}}}}}}}}");
+        "{limit: {n: 2, node: {fetch: {filter: null, node: "
+        "{skip: {n: 7, node: {ixscan: {filter: null, pattern: {x: 1}}}}}}}}}");
 }
 
 TEST_F(QueryPlannerTest, SkipAndSoftLimit) {
@@ -496,9 +542,8 @@ TEST_F(QueryPlannerTest, SkipAndSoftLimit) {
         "{skip: {n: 7, node: "
         "{cscan: {dir: 1, filter: {x: {$lte: 4}}}}}}");
     assertSolutionExists(
-        "{skip: {n: 7, node: {fetch: "
-        "{filter: null, node: {ixscan: "
-        "{filter: null, pattern: {x: 1}}}}}}}");
+        "{fetch: {filter: null, node: {skip: {n: 7, node: "
+        "{ixscan: {filter: null, pattern: {x: 1}}}}}}}");
 }
 
 //
author	Justin Seyster <justin.seyster@mongodb.com>	2018-06-22 18:36:03 -0400
committer	Justin Seyster <justin.seyster@mongodb.com>	2018-10-15 17:33:40 -0400
commit	8e540c0b6db93ce994cc548f000900bdc740f80a (patch)
tree	e4a63314f67158d2d7a784a029f04e0d28865b88
parent	f143314e5d5b1021ed11ab2a2c57d0e6ada24926 (diff)
download	mongo-8e540c0b6db93ce994cc548f000900bdc740f80a.tar.gz