summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Seyster <justin.seyster@mongodb.com>2018-06-22 18:36:03 -0400
committerJustin Seyster <justin.seyster@mongodb.com>2018-10-15 17:33:40 -0400
commit8e540c0b6db93ce994cc548f000900bdc740f80a (patch)
treee4a63314f67158d2d7a784a029f04e0d28865b88
parentf143314e5d5b1021ed11ab2a2c57d0e6ada24926 (diff)
downloadmongo-8e540c0b6db93ce994cc548f000900bdc740f80a.tar.gz
SERVER-13946 Put skip stages before fetch stages.
(cherry picked from commit 69f3e89f6921fc4ff2b5413952eeb517af69bb83)
-rw-r--r--jstests/core/add_skip_stage_before_fetch.js63
-rw-r--r--src/mongo/db/query/planner_analysis.cpp14
-rw-r--r--src/mongo/db/query/query_planner_test.cpp59
3 files changed, 122 insertions, 14 deletions
diff --git a/jstests/core/add_skip_stage_before_fetch.js b/jstests/core/add_skip_stage_before_fetch.js
new file mode 100644
index 00000000000..064cd1ac5e7
--- /dev/null
+++ b/jstests/core/add_skip_stage_before_fetch.js
@@ -0,0 +1,63 @@
+// SERVER-13946: When possible, place skip stages before fetch stages to avoid unnecessarily
+// fetching documents that will be immediately discarded.
+
+// The skip operation in a sharded query always occurs in the mongoS, so this test doesn't make
+// sense on a sharded collection.
+// @tags: [assumes_unsharded_collection]
+
+(function() {
+ "use strict";
+
+ load("jstests/libs/analyze_plan.js");
+
+ const coll = db.add_skip_stage_before_fetch;
+
+ coll.drop();
+ const testIndex = {a: 1, b: 1, c: 1};
+ assert.commandWorked(coll.createIndex(testIndex));
+
+ const bulk = coll.initializeUnorderedBulkOp();
+ for (let i = 0; i < 10000; i++) {
+ bulk.insert({
+ a: i % 2,
+ b: i % 4,
+ c: Math.floor(Math.random() * 1000),
+ d: Math.floor(Math.random() * 1000)
+ });
+ }
+ assert.writeOK(bulk.execute());
+
+ // The {a: 0, b: 2} query will match exactly one quarter of the documents in the collection:
+ // 2500 in total. In the test queries below, we skip the first 2400, returning exactly 100
+ // documents.
+
+ // This find can be computed using the index, so we should only need to fetch the 100 documents
+ // that get returned to the client after skipping the first 2400.
+ let explainResult =
+ coll.find({a: 0, b: 2}).hint(testIndex).skip(2400).explain("executionStats");
+ assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+ assert.eq(explainResult.executionStats.totalDocsExamined, 100);
+
+ // This sort can also be computed using the index.
+ explainResult =
+ coll.find({a: 0, b: 2}).hint(testIndex).sort({c: 1}).skip(2400).explain("executionStats");
+ assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+ assert.eq(explainResult.executionStats.totalDocsExamined, 100);
+
+ // This query is covered by the index, so there should be no fetch at all.
+ explainResult = coll.find({a: 0, b: 2}, {_id: 0, a: 1})
+ .hint(testIndex)
+ .sort({c: 1})
+ .skip(2400)
+ .explain("executionStats");
+ assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+ assert.eq(explainResult.executionStats.totalDocsExamined, 0);
+ assert(isIndexOnly(explainResult.queryPlanner.winningPlan));
+
+ // This sort requires a field that is not in the index, so we should be fetching all 2500
+ // documents that match the find predicate.
+ explainResult =
+ coll.find({a: 0, b: 2}).hint(testIndex).sort({d: 1}).skip(2400).explain("executionStats");
+ assert.gte(explainResult.executionStats.totalKeysExamined, 2500);
+ assert.eq(explainResult.executionStats.totalDocsExamined, 2500);
+})();
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index cd11da91262..fe174018d47 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -723,6 +723,13 @@ QuerySolution* QueryPlannerAnalysis::analyzeDataAccess(
}
}
+ if (qr.getSkip()) {
+ auto skip = std::make_unique<SkipNode>();
+ skip->skip = *qr.getSkip();
+ skip->children.push_back(solnRoot.release());
+ solnRoot = std::move(skip);
+ }
+
// Project the results.
if (NULL != query.getProj()) {
LOG(5) << "PROJECTION: Current plan is:\n" << redact(solnRoot->toString());
@@ -829,13 +836,6 @@ QuerySolution* QueryPlannerAnalysis::analyzeDataAccess(
}
}
- if (qr.getSkip()) {
- SkipNode* skip = new SkipNode();
- skip->skip = *qr.getSkip();
- skip->children.push_back(solnRoot.release());
- solnRoot.reset(skip);
- }
-
// When there is both a blocking sort and a limit, the limit will
// be enforced by the blocking sort.
// Otherwise, we need to limit the results in the case of a hard limit
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 4293806ae38..8f6b0293a84 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -425,10 +425,57 @@ TEST_F(QueryPlannerTest, BasicSkipWithIndex) {
ASSERT_EQUALS(getNumSolutions(), 2U);
assertSolutionExists("{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5}}}}}");
assertSolutionExists(
- "{skip: {n: 8, node: {fetch: {filter: null, node: "
+ "{fetch: {filter: null, node: {skip: {n: 8, node: "
"{ixscan: {filter: null, pattern: {a: 1, b: 1}}}}}}}");
}
+TEST_F(QueryPlannerTest, CoveredSkipWithIndex) {
+ addIndex(fromjson("{a: 1, b: 1}"));
+
+ runQuerySortProjSkipNToReturn(
+ fromjson("{a: 5}"), BSONObj(), fromjson("{_id: 0, a: 1, b: 1}"), 8, 0);
+
+ ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertSolutionExists(
+ "{proj: {spec: {_id: 0, a: 1, b: 1}, node: "
+ "{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5}}}}}}}");
+ assertSolutionExists(
+ "{proj: {spec: {_id: 0, a: 1, b: 1}, "
+ "node: {skip: {n: 8, node: {ixscan: {filter: null, pattern: {a: 1, b: 1}}}}}}}");
+}
+
+TEST_F(QueryPlannerTest, SkipEvaluatesAfterFetchWithPredicate) {
+ addIndex(fromjson("{a: 1}"));
+
+ runQuerySkipNToReturn(fromjson("{a: 5, b: 7}"), 8, 0);
+
+ ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertSolutionExists("{skip: {n: 8, node: {cscan: {dir: 1, filter: {a: 5, b: 7}}}}}");
+
+ // When a plan includes a fetch with no predicate, the skip should execute first, so we avoid
+ // fetching a document that we will always discard. When the fetch does have a predicate (as in
+ // this case), however, that optimization would be invalid; we need to fetch the document and
+ // evaluate the filter to determine if the document should count towards the number of skipped
+ // documents.
+ assertSolutionExists(
+ "{skip: {n: 8, node: {fetch: {filter: {b: 7}, node: "
+ "{ixscan: {filter: null, pattern: {a: 1}}}}}}}");
+}
+
+TEST_F(QueryPlannerTest, SkipEvaluatesBeforeFetchForIndexedOr) {
+ addIndex(fromjson("{a: 1}"));
+
+ runQuerySkipNToReturn(fromjson("{$or: [{a: 5}, {a: 7}]}"), 8, 0);
+
+ ASSERT_EQUALS(getNumSolutions(), 2U);
+ assertSolutionExists(
+ "{skip: {n: 8, node: "
+ "{cscan: {dir: 1, filter: {$or: [{a: 5}, {a: 7}]}}}}}");
+ assertSolutionExists(
+ "{fetch: {filter: null, node: {skip: {n: 8, node: "
+ "{ixscan: {filter: null, pattern: {a: 1}}}}}}}");
+}
+
TEST_F(QueryPlannerTest, BasicLimitNoIndex) {
addIndex(BSON("a" << 1));
@@ -481,9 +528,8 @@ TEST_F(QueryPlannerTest, SkipAndLimit) {
"{limit: {n: 2, node: {skip: {n: 7, node: "
"{cscan: {dir: 1, filter: {x: {$lte: 4}}}}}}}}");
assertSolutionExists(
- "{limit: {n: 2, node: {skip: {n: 7, node: {fetch: "
- "{filter: null, node: {ixscan: "
- "{filter: null, pattern: {x: 1}}}}}}}}}");
+ "{limit: {n: 2, node: {fetch: {filter: null, node: "
+ "{skip: {n: 7, node: {ixscan: {filter: null, pattern: {x: 1}}}}}}}}}");
}
TEST_F(QueryPlannerTest, SkipAndSoftLimit) {
@@ -496,9 +542,8 @@ TEST_F(QueryPlannerTest, SkipAndSoftLimit) {
"{skip: {n: 7, node: "
"{cscan: {dir: 1, filter: {x: {$lte: 4}}}}}}");
assertSolutionExists(
- "{skip: {n: 7, node: {fetch: "
- "{filter: null, node: {ixscan: "
- "{filter: null, pattern: {x: 1}}}}}}}");
+ "{fetch: {filter: null, node: {skip: {n: 7, node: "
+ "{ixscan: {filter: null, pattern: {x: 1}}}}}}}");
}
//