diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2018-06-22 18:36:03 -0400 |
---|---|---|
committer | Justin Seyster <justin.seyster@mongodb.com> | 2018-10-15 17:33:40 -0400 |
commit | 8e540c0b6db93ce994cc548f000900bdc740f80a (patch) | |
tree | e4a63314f67158d2d7a784a029f04e0d28865b88 | |
parent | f143314e5d5b1021ed11ab2a2c57d0e6ada24926 (diff) | |
download | mongo-8e540c0b6db93ce994cc548f000900bdc740f80a.tar.gz |
SERVER-13946 Put skip stages before fetch stages.
(cherry picked from commit 69f3e89f6921fc4ff2b5413952eeb517af69bb83)
-rw-r--r-- | jstests/core/add_skip_stage_before_fetch.js | 63 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_test.cpp | 59 |
3 files changed, 122 insertions, 14 deletions
diff --git a/jstests/core/add_skip_stage_before_fetch.js b/jstests/core/add_skip_stage_before_fetch.js new file mode 100644 index 00000000000..064cd1ac5e7 --- /dev/null +++ b/jstests/core/add_skip_stage_before_fetch.js @@ -0,0 +1,63 @@ +// SERVER-13946: When possible, place skip stages before fetch stages to avoid unnecessarily +// fetching documents that will be immediately discarded. + +// The skip operation in a sharded query always occurs in the mongoS, so this test doesn't make +// sense on a sharded collection. +// @tags: [assumes_unsharded_collection] + +(function() { + "use strict"; + + load("jstests/libs/analyze_plan.js"); + + const coll = db.add_skip_stage_before_fetch; + + coll.drop(); + const testIndex = {a: 1, b: 1, c: 1}; + assert.commandWorked(coll.createIndex(testIndex)); + + const bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < 10000; i++) { + bulk.insert({ + a: i % 2, + b: i % 4, + c: Math.floor(Math.random() * 1000), + d: Math.floor(Math.random() * 1000) + }); + } + assert.writeOK(bulk.execute()); + + // The {a: 0, b: 2} query will match exactly one quarter of the documents in the collection: + // 2500 in total. In the test queries below, we skip the first 2400, returning exactly 100 + // documents. + + // This find can be computed using the index, so we should only need to fetch the 100 documents + // that get returned to the client after skipping the first 2400. + let explainResult = + coll.find({a: 0, b: 2}).hint(testIndex).skip(2400).explain("executionStats"); + assert.gte(explainResult.executionStats.totalKeysExamined, 2500); + assert.eq(explainResult.executionStats.totalDocsExamined, 100); + + // This sort can also be computed using the index. + explainResult = + coll.find({a: 0, b: 2}).hint(testIndex).sort({c: 1}).skip(2400).explain("executionStats"); + assert.gte(explainResult.executionStats.totalKeysExamined, 2500); + assert.eq(explainResult.executionStats.totalDocsExamined, 100); + + // This query is covered by the index, so there should be no fetch at all. + explainResult = coll.find({a: 0, b: 2}, {_id: 0, a: 1}) + .hint(testIndex) + .sort({c: 1}) + .skip(2400) + .explain("executionStats"); + assert.gte(explainResult.executionStats.totalKeysExamined, 2500); + assert.eq(explainResult.executionStats.totalDocsExamined, 0); + assert(isIndexOnly(explainResult.queryPlanner.winningPlan)); + + // This sort requires a field that is not in the index, so we should be fetching all 2500 + // documents that match the find predicate. + explainResult = + coll.find({a: 0, b: 2}).hint(testIndex).sort({d: 1}).skip(2400).explain("executionStats"); + assert.gte(explainResult.executionStats.totalKeysExamined, 2500); + assert.eq(explainResult.executionStats.totalDocsExamined, 2500); +})(); diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index cd11da91262..fe174018d47 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -723,6 +723,13 @@ QuerySolution* QueryPlannerAnalysis::analyzeDataAccess( } } + if (qr.getSkip()) { + auto skip = std::make_unique<SkipNode>(); + skip->skip = *qr.getSkip(); + skip->children.push_back(solnRoot.release()); + solnRoot = std::move(skip); + } + // Project the results. if (NULL != query.getProj()) { LOG(5) << "PROJECTION: Current plan is:\n" << redact(solnRoot->toString()); @@ -829,13 +836,6 @@ QuerySolution* QueryPlannerAnalysis::analyzeDataAccess( } } - if (qr.getSkip()) { - SkipNode* skip = new SkipNode(); - skip->skip = *qr.getSkip(); - skip->children.push_back(solnRoot.release()); - solnRoot.reset(skip); - } - // When there is both a blocking sort and a limit, the limit will // be enforced by the blocking sort. // Otherwise, we need to limit the results in the case of a hard limit diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index 4293806ae38..8f6b0293a84 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -425,10 +425,57 @@ TEST_F(QueryPlannerTest, BasicSkipWithIndex) { ASSERT_EQUALS(getNumSolutions(), 2U); assertSolutionExists("{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5}}}}}"); assertSolutionExists( - "{skip: {n: 8, node: {fetch: {filter: null, node: " + "{fetch: {filter: null, node: {skip: {n: 8, node: " "{ixscan: {filter: null, pattern: {a: 1, b: 1}}}}}}}"); } +TEST_F(QueryPlannerTest, CoveredSkipWithIndex) { + addIndex(fromjson("{a: 1, b: 1}")); + + runQuerySortProjSkipNToReturn( + fromjson("{a: 5}"), BSONObj(), fromjson("{_id: 0, a: 1, b: 1}"), 8, 0); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists( + "{proj: {spec: {_id: 0, a: 1, b: 1}, node: " + "{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5}}}}}}}"); + assertSolutionExists( + "{proj: {spec: {_id: 0, a: 1, b: 1}, " + "node: {skip: {n: 8, node: {ixscan: {filter: null, pattern: {a: 1, b: 1}}}}}}}"); +} + +TEST_F(QueryPlannerTest, SkipEvaluatesAfterFetchWithPredicate) { + addIndex(fromjson("{a: 1}")); + + runQuerySkipNToReturn(fromjson("{a: 5, b: 7}"), 8, 0); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists("{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5, b: 7}}}}}"); + + // When a plan includes a fetch with no predicate, the skip should execute first, so we avoid + // fetching a document that we will always discard. When the fetch does have a predicate (as in + // this case), however, that optimization would be invalid; we need to fetch the document and + // evaluate the filter to determine if the document should count towards the number of skipped + // documents. + assertSolutionExists( + "{skip: {n: 8, node: {fetch: {filter: {b: 7}, node: " + "{ixscan: {filter: null, pattern: {a: 1}}}}}}}"); +} + +TEST_F(QueryPlannerTest, SkipEvaluatesBeforeFetchForIndexedOr) { + addIndex(fromjson("{a: 1}")); + + runQuerySkipNToReturn(fromjson("{$or: [{a: 5}, {a: 7}]}"), 8, 0); + + ASSERT_EQUALS(getNumSolutions(), 2U); + assertSolutionExists( + "{skip: {n: 8, node: " + "{cscan: {dir: 1, filter: {$or: [{a: 5}, {a: 7}]}}}}}"); + assertSolutionExists( + "{fetch: {filter: null, node: {skip: {n: 8, node: " + "{ixscan: {filter: null, pattern: {a: 1}}}}}}}"); +} + TEST_F(QueryPlannerTest, BasicLimitNoIndex) { addIndex(BSON("a" << 1)); @@ -481,9 +528,8 @@ TEST_F(QueryPlannerTest, SkipAndLimit) { "{limit: {n: 2, node: {skip: {n: 7, node: " "{cscan: {dir: 1, filter: {x: {$lte: 4}}}}}}}}"); assertSolutionExists( - "{limit: {n: 2, node: {skip: {n: 7, node: {fetch: " - "{filter: null, node: {ixscan: " - "{filter: null, pattern: {x: 1}}}}}}}}}"); + "{limit: {n: 2, node: {fetch: {filter: null, node: " + "{skip: {n: 7, node: {ixscan: {filter: null, pattern: {x: 1}}}}}}}}}"); } TEST_F(QueryPlannerTest, SkipAndSoftLimit) { @@ -496,9 +542,8 @@ TEST_F(QueryPlannerTest, SkipAndSoftLimit) { "{skip: {n: 7, node: " "{cscan: {dir: 1, filter: {x: {$lte: 4}}}}}}"); assertSolutionExists( - "{skip: {n: 7, node: {fetch: " - "{filter: null, node: {ixscan: " - "{filter: null, pattern: {x: 1}}}}}}}"); + "{fetch: {filter: null, node: {skip: {n: 7, node: " + "{ixscan: {filter: null, pattern: {x: 1}}}}}}}"); } // |