summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Mayr <smayr@atlassian.com>2020-10-14 18:10:01 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-15 16:31:09 +0000
commit14bfbd8833706912617c0c904cce9847fbbbd0f1 (patch)
treee8876ba38ae5c0d79f458f100a4e684e68207b3e
parent4772fe5bd1a4e66c5cc8177cdb5956e05b07b176 (diff)
downloadmongo-14bfbd8833706912617c0c904cce9847fbbbd0f1.tar.gz
SERVER-39191 Optimize count queries on sharded collections
When counts were made orphan-aware the addition of the SHARD_FILTER stage started preventing the COUNT_SCAN optimization on all sharded collections, leading to significant performance loss on some workloads. However, there can not be any orphans if the shard key is fully specified: A mongod will never have orphans for chunk ranges it owns. This patch changes the query system to omit the SHARDING_FILTER stage in cases where the query specifies the full shard key, regaining lost performance. Closes #1369. Signed-off-by: Ian Boros <ian.boros@mongodb.com>
-rw-r--r--jstests/sharding/queries_elide_shard_filter.js108
-rw-r--r--src/mongo/db/query/get_executor.cpp16
2 files changed, 123 insertions, 1 deletions
diff --git a/jstests/sharding/queries_elide_shard_filter.js b/jstests/sharding/queries_elide_shard_filter.js
new file mode 100644
index 00000000000..ab0fbb5ecad
--- /dev/null
+++ b/jstests/sharding/queries_elide_shard_filter.js
@@ -0,0 +1,108 @@
+//
+// Tests that queries in sharded collections will be properly optimized. In particular, queries
+// which specify the shard key fully with an equality predicate may omit a SHARDING_FILTER stage.
+//
+// @tags: [requires_fcv_49]
+
+(function() {
+"use strict";
+
+load("jstests/libs/analyze_plan.js");
+
+function assertShardFilter(explain) {
+ const filterStage = getPlanStage(explain.queryPlanner.winningPlan, "SHARDING_FILTER");
+ assert.eq(filterStage.stage, "SHARDING_FILTER");
+ const scanStage = filterStage.inputStage;
+ assert.contains(scanStage.stage, ["IXSCAN", "FETCH"]);
+}
+
+function assertNoShardFilter(explain) {
+ const filterStage = getPlanStage(explain.queryPlanner.winningPlan, "SHARDING_FILTER");
+ assert.eq(filterStage, null, explain);
+}
+
+function assertCountScan(explain) {
+ const countStage = getPlanStage(explain.queryPlanner.winningPlan, "COUNT_SCAN");
+ assert.eq(countStage.stage, "COUNT_SCAN");
+}
+
+const st = new ShardingTest({shards: 1});
+const coll = st.s0.getCollection("foo.bar");
+
+function createCollection(coll, shardKey) {
+ coll.drop();
+ assert.commandWorked(st.s0.adminCommand({shardCollection: coll.getFullName(), key: shardKey}));
+
+ assert.commandWorked(coll.insert({_id: true, a: true, b: true, c: true, d: true}));
+ assert.commandWorked(coll.createIndex({a: 1, b: 1}));
+ assert.commandWorked(coll.createIndex({b: 1, a: 1}));
+}
+
+assert.commandWorked(st.s0.adminCommand({enableSharding: coll.getDB().getName()}));
+
+jsTest.log('Tests with single shard key');
+createCollection(coll, {a: 1});
+
+// We're requesting a specific shard key, therefore we should optimize away SHARDING_FILTER
+// and use a cheaper COUNT_SCAN.
+let explain = assert.commandWorked(coll.explain('executionStats').count({a: true}));
+assertCountScan(explain);
+// Check this works with a subset of records as well.
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true}));
+assertCountScan(explain);
+
+// Test that a find() query which specifies the entire shard key does not need a shard filter.
+explain = assert.commandWorked(coll.find({a: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: true}).explain());
+assertNoShardFilter(explain);
+
+// We're not checking shard key for equality, therefore need a sharding filter.
+explain = assert.commandWorked(coll.explain('executionStats').count({a: {$in: [true, false]}}));
+assertShardFilter(explain);
+
+// We're requesting a disjoint key from shardkey, therefore need a sharding filter.
+explain = assert.commandWorked(coll.explain('executionStats').count({b: true}));
+assertShardFilter(explain);
+
+jsTest.log('Tests with compound shard key');
+createCollection(coll, {a: 1, b: 1});
+
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true}));
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true}));
+assertCountScan(explain);
+explain =
+ assert.commandWorked(coll.explain('executionStats').count({a: true, b: {$in: [true, false]}}));
+assertShardFilter(explain);
+
+explain = assert.commandWorked(coll.find({a: true}).explain());
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: {$in: [true, false]}}).explain());
+assertShardFilter(explain);
+
+jsTest.log('Tests with hashed shard key');
+createCollection(coll, {a: 'hashed'});
+
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true}));
+assertCountScan(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true}));
+assertCountScan(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({a: {$in: [true, false]}}));
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.explain('executionStats').count({b: true}));
+assertShardFilter(explain);
+
+explain = assert.commandWorked(coll.find({a: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: true, b: true}).explain());
+assertNoShardFilter(explain);
+explain = assert.commandWorked(coll.find({a: {$in: [true, false]}}).explain());
+assertShardFilter(explain);
+explain = assert.commandWorked(coll.find({b: true}).explain());
+assertShardFilter(explain);
+
+st.stop();
+})();
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index a44eba9ef71..c2da4d5ac05 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -305,7 +305,21 @@ void fillOutPlannerParams(OperationContext* opCtx,
auto collDesc = CollectionShardingState::get(opCtx, canonicalQuery->nss())
->getCollectionDescription(opCtx);
if (collDesc.isSharded()) {
- plannerParams->shardKey = collDesc.getKeyPattern();
+ const auto& keyPattern = collDesc.getKeyPattern();
+ ShardKeyPattern shardKeyPattern(keyPattern);
+
+ // If the shard key is specified exactly, the query is guaranteed to only target one
+ // shard. Shards cannot own orphans for the key ranges they own, so there is no need
+ // to include a shard filtering stage. By omitting the shard filter, it may be possible
+ // to get a more efficient plan (for example, a COUNT_SCAN may be used if the query is
+ // eligible).
+ const BSONObj extractedKey = shardKeyPattern.extractShardKeyFromQuery(*canonicalQuery);
+
+ if (extractedKey.isEmpty()) {
+ plannerParams->shardKey = keyPattern;
+ } else {
+ plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
+ }
} else {
// If there's no metadata don't bother w/the shard filter since we won't know what
// the key pattern is anyway...