SERVER-39191 Optimize count queries on sharded collections

When counts were made orphan-aware the addition of the SHARD_FILTER stage started preventing the COUNT_SCAN optimization on all sharded collections, leading to significant performance loss on some workloads. However, there can not be any orphans if the shard key is fully specified: A mongod will never have orphans for chunk ranges it owns. This patch changes the query system to omit the SHARDING_FILTER stage in cases where the query specifies the full shard key, regaining lost performance. Closes #1369. Signed-off-by: Ian Boros <ian.boros@mongodb.com>
author: Sebastian Mayr <smayr@atlassian.com> 2020-10-14 18:10:01 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-10-15 16:31:09 +0000
commit: 14bfbd8833706912617c0c904cce9847fbbbd0f1 (patch)
tree: e8876ba38ae5c0d79f458f100a4e684e68207b3e
parent: 4772fe5bd1a4e66c5cc8177cdb5956e05b07b176 (diff)
download: mongo-14bfbd8833706912617c0c904cce9847fbbbd0f1.tar.gz
2 files changed, 123 insertions, 1 deletions
diff --git a/jstests/sharding/queries_elide_shard_filter.js b/jstests/sharding/queries_elide_shard_filter.js
new file mode 100644
index 00000000000..ab0fbb5ecad
--- /dev/null
+++ b/jstests/sharding/queries_elide_shard_filter.js
@@ -0,0 +1,108 @@
+//
+// Tests that queries in sharded collections will be properly optimized. In particular, queries
+// which specify the shard key fully with an equality predicate may omit a SHARDING_FILTER stage.
+//
+// @tags: [requires_fcv_49]
+
+(function() {
+"use strict";
+
+load("jstests/libs/analyze_plan.js");
+
+function assertShardFilter(explain) {
+    const filterStage = getPlanStage(explain.queryPlanner.winningPlan, "SHARDING_FILTER");
+    assert.eq(filterStage.stage, "SHARDING_FILTER");
+    const scanStage = filterStage.inputStage;
+    assert.contains(scanStage.stage, ["IXSCAN", "FETCH"]);
+}
+
+function assertNoShardFilter(explain) {
+    const filterStage = getPlanStage(explain.queryPlanner.winningPlan, "SHARDING_FILTER");
+    assert.eq(filterStage, null, explain);
+}
+
+function assertCountScan(explain) {
+    const countStage = getPlanStage(explain.queryPlanner.winningPlan, "COUNT_SCAN");
+    assert.eq(countStage.stage, "COUNT_SCAN");
+}
+
+const st = new ShardingTest({shards: 1});
+const coll = st.s0.getCollection("foo.bar");
+
+function createCollection(coll, shardKey) {
+    coll.drop();
+    assert.commandWorked(st.s0.adminCommand({shardCollection: coll.getFullName(), key: shardKey}));
+
+    assert.commandWorked(coll.insert({_id: true, a: true, b: true, c: true, d: true}));
+    assert.commandWorked(coll.createIndex({a: 1, b: 1}));
+    assert.commandWorked(coll.createIndex({b: 1, a: 1}));
+}
+
+assert.commandWorked(st.s0.adminCommand({enableSharding: coll.getDB().getName()}));
+
+jsTest.log('Tests with single shard key');
+createCollection(coll, {a: 1});
+
+// We're requesting a specific shard key, therefore we should optimize away SHARDING_FILTER
+// and use a cheaper COUNT_SCAN.
+let explain = assert.commandWorked(coll.explain('executionStats').count({a: true}));
+assertCountScan(explain);
+// Check this works with a subset of records as well.
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true}));
+assertCountScan(explain);
+
+// Test that a find() query which specifies the entire shard key does not need a shard filter.
+explain = assert.commandWorked(coll.find({a: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: true}).explain());
+assertNoShardFilter(explain);
+
+// We're not checking shard key for equality, therefore need a sharding filter.
+explain = assert.commandWorked(coll.explain('executionStats').count({a: {$in: [true, false]}}));
+assertShardFilter(explain);
+
+// We're requesting a disjoint key from shardkey, therefore need a sharding filter.
+explain = assert.commandWorked(coll.explain('executionStats').count({b: true}));
+assertShardFilter(explain);
+
+jsTest.log('Tests with compound shard key');
+createCollection(coll, {a: 1, b: 1});
+
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true}));
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true}));
+assertCountScan(explain);
+explain =
+    assert.commandWorked(coll.explain('executionStats').count({a: true, b: {$in: [true, false]}}));
+assertShardFilter(explain);
+
+explain = assert.commandWorked(coll.find({a: true}).explain());
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: {$in: [true, false]}}).explain());
+assertShardFilter(explain);
+
+jsTest.log('Tests with hashed shard key');
+createCollection(coll, {a: 'hashed'});
+
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true}));
+assertCountScan(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true}));
+assertCountScan(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({a: {$in: [true, false]}}));
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({b: true}));
+assertShardFilter(explain);
+
+explain = assert.commandWorked(coll.find({a: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: {$in: [true, false]}}).explain());
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.find({b: true}).explain());
+assertShardFilter(explain);
+
+st.stop();
+})();
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index a44eba9ef71..c2da4d5ac05 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -305,7 +305,21 @@ void fillOutPlannerParams(OperationContext* opCtx,
         auto collDesc = CollectionShardingState::get(opCtx, canonicalQuery->nss())
                             ->getCollectionDescription(opCtx);
         if (collDesc.isSharded()) {
-            plannerParams->shardKey = collDesc.getKeyPattern();
+            const auto& keyPattern = collDesc.getKeyPattern();
+            ShardKeyPattern shardKeyPattern(keyPattern);
+
+            // If the shard key is specified exactly, the query is guaranteed to only target one
+            // shard. Shards cannot own orphans for the key ranges they own, so there is no need
+            // to include a shard filtering stage. By omitting the shard filter, it may be possible
+            // to get a more efficient plan (for example, a COUNT_SCAN may be used if the query is
+            // eligible).
+            const BSONObj extractedKey = shardKeyPattern.extractShardKeyFromQuery(*canonicalQuery);
+
+            if (extractedKey.isEmpty()) {
+                plannerParams->shardKey = keyPattern;
+            } else {
+                plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
+            }
         } else {
             // If there's no metadata don't bother w/the shard filter since we won't know what
             // the key pattern is anyway...
author	Sebastian Mayr <smayr@atlassian.com>	2020-10-14 18:10:01 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-10-15 16:31:09 +0000
commit	14bfbd8833706912617c0c904cce9847fbbbd0f1 (patch)
tree	e8876ba38ae5c0d79f458f100a4e684e68207b3e
parent	4772fe5bd1a4e66c5cc8177cdb5956e05b07b176 (diff)
download	mongo-14bfbd8833706912617c0c904cce9847fbbbd0f1.tar.gz