diff options
author | Sebastian Mayr <smayr@atlassian.com> | 2020-10-14 18:10:01 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-15 16:31:09 +0000 |
commit | 14bfbd8833706912617c0c904cce9847fbbbd0f1 (patch) | |
tree | e8876ba38ae5c0d79f458f100a4e684e68207b3e | |
parent | 4772fe5bd1a4e66c5cc8177cdb5956e05b07b176 (diff) | |
download | mongo-14bfbd8833706912617c0c904cce9847fbbbd0f1.tar.gz |
SERVER-39191 Optimize count queries on sharded collections
When counts were made orphan-aware the addition of the SHARD_FILTER
stage started preventing the COUNT_SCAN optimization on all sharded
collections, leading to significant performance loss on some workloads.
However, there can not be any orphans if the shard key is fully
specified: A mongod will never have orphans for chunk ranges
it owns.
This patch changes the query system to omit the SHARDING_FILTER stage in cases
where the query specifies the full shard key, regaining lost performance.
Closes #1369.
Signed-off-by: Ian Boros <ian.boros@mongodb.com>
-rw-r--r-- | jstests/sharding/queries_elide_shard_filter.js | 108 | ||||
-rw-r--r-- | src/mongo/db/query/get_executor.cpp | 16 |
2 files changed, 123 insertions, 1 deletions
diff --git a/jstests/sharding/queries_elide_shard_filter.js b/jstests/sharding/queries_elide_shard_filter.js new file mode 100644 index 00000000000..ab0fbb5ecad --- /dev/null +++ b/jstests/sharding/queries_elide_shard_filter.js @@ -0,0 +1,108 @@ +// +// Tests that queries in sharded collections will be properly optimized. In particular, queries +// which specify the shard key fully with an equality predicate may omit a SHARDING_FILTER stage. +// +// @tags: [requires_fcv_49] + +(function() { +"use strict"; + +load("jstests/libs/analyze_plan.js"); + +function assertShardFilter(explain) { + const filterStage = getPlanStage(explain.queryPlanner.winningPlan, "SHARDING_FILTER"); + assert.eq(filterStage.stage, "SHARDING_FILTER"); + const scanStage = filterStage.inputStage; + assert.contains(scanStage.stage, ["IXSCAN", "FETCH"]); +} + +function assertNoShardFilter(explain) { + const filterStage = getPlanStage(explain.queryPlanner.winningPlan, "SHARDING_FILTER"); + assert.eq(filterStage, null, explain); +} + +function assertCountScan(explain) { + const countStage = getPlanStage(explain.queryPlanner.winningPlan, "COUNT_SCAN"); + assert.eq(countStage.stage, "COUNT_SCAN"); +} + +const st = new ShardingTest({shards: 1}); +const coll = st.s0.getCollection("foo.bar"); + +function createCollection(coll, shardKey) { + coll.drop(); + assert.commandWorked(st.s0.adminCommand({shardCollection: coll.getFullName(), key: shardKey})); + + assert.commandWorked(coll.insert({_id: true, a: true, b: true, c: true, d: true})); + assert.commandWorked(coll.createIndex({a: 1, b: 1})); + assert.commandWorked(coll.createIndex({b: 1, a: 1})); +} + +assert.commandWorked(st.s0.adminCommand({enableSharding: coll.getDB().getName()})); + +jsTest.log('Tests with single shard key'); +createCollection(coll, {a: 1}); + +// We're requesting a specific shard key, therefore we should optimize away SHARDING_FILTER +// and use a cheaper COUNT_SCAN. +let explain = assert.commandWorked(coll.explain('executionStats').count({a: true})); +assertCountScan(explain); +// Check this works with a subset of records as well. +explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true})); +assertCountScan(explain); + +// Test that a find() query which specifies the entire shard key does not need a shard filter. +explain = assert.commandWorked(coll.find({a: true}).explain()); +assertNoShardFilter(explain); +explain = assert.commandWorked(coll.find({a: true, b: true}).explain()); +assertNoShardFilter(explain); + +// We're not checking shard key for equality, therefore need a sharding filter. +explain = assert.commandWorked(coll.explain('executionStats').count({a: {$in: [true, false]}})); +assertShardFilter(explain); + +// We're requesting a disjoint key from shardkey, therefore need a sharding filter. +explain = assert.commandWorked(coll.explain('executionStats').count({b: true})); +assertShardFilter(explain); + +jsTest.log('Tests with compound shard key'); +createCollection(coll, {a: 1, b: 1}); + +explain = assert.commandWorked(coll.explain('executionStats').count({a: true})); +assertShardFilter(explain); +explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true})); +assertCountScan(explain); +explain = + assert.commandWorked(coll.explain('executionStats').count({a: true, b: {$in: [true, false]}})); +assertShardFilter(explain); + +explain = assert.commandWorked(coll.find({a: true}).explain()); +assertShardFilter(explain); +explain = assert.commandWorked(coll.find({a: true, b: true}).explain()); +assertNoShardFilter(explain); +explain = assert.commandWorked(coll.find({a: true, b: {$in: [true, false]}}).explain()); +assertShardFilter(explain); + +jsTest.log('Tests with hashed shard key'); +createCollection(coll, {a: 'hashed'}); + +explain = assert.commandWorked(coll.explain('executionStats').count({a: true})); +assertCountScan(explain); +explain = assert.commandWorked(coll.explain('executionStats').count({a: true, b: true})); +assertCountScan(explain); +explain = assert.commandWorked(coll.explain('executionStats').count({a: {$in: [true, false]}})); +assertShardFilter(explain); +explain = assert.commandWorked(coll.explain('executionStats').count({b: true})); +assertShardFilter(explain); + +explain = assert.commandWorked(coll.find({a: true}).explain()); +assertNoShardFilter(explain); +explain = assert.commandWorked(coll.find({a: true, b: true}).explain()); +assertNoShardFilter(explain); +explain = assert.commandWorked(coll.find({a: {$in: [true, false]}}).explain()); +assertShardFilter(explain); +explain = assert.commandWorked(coll.find({b: true}).explain()); +assertShardFilter(explain); + +st.stop(); +})(); diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index a44eba9ef71..c2da4d5ac05 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -305,7 +305,21 @@ void fillOutPlannerParams(OperationContext* opCtx, auto collDesc = CollectionShardingState::get(opCtx, canonicalQuery->nss()) ->getCollectionDescription(opCtx); if (collDesc.isSharded()) { - plannerParams->shardKey = collDesc.getKeyPattern(); + const auto& keyPattern = collDesc.getKeyPattern(); + ShardKeyPattern shardKeyPattern(keyPattern); + + // If the shard key is specified exactly, the query is guaranteed to only target one + // shard. Shards cannot own orphans for the key ranges they own, so there is no need + // to include a shard filtering stage. By omitting the shard filter, it may be possible + // to get a more efficient plan (for example, a COUNT_SCAN may be used if the query is + // eligible). + const BSONObj extractedKey = shardKeyPattern.extractShardKeyFromQuery(*canonicalQuery); + + if (extractedKey.isEmpty()) { + plannerParams->shardKey = keyPattern; + } else { + plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; + } } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... |