summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <mao.cheahuychou@gmail.com>2022-12-15 18:52:00 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-12-16 11:31:40 +0000
commitb7d1b4b8b5c2290d0faa6557cab3d8d08af2283c (patch)
tree30980af51ca9613680165425af25e561296b3ba5
parent49a208dc8950dfcc85ee65e85fb725be80dd4aba (diff)
downloadmongo-b7d1b4b8b5c2290d0faa6557cab3d8d08af2283c.tar.gz
SERVER-72031 Support calculating read distribution metrics from sampled read queries
-rw-r--r--src/mongo/db/s/SConscript2
-rw-r--r--src/mongo/db/s/analyze_shard_key_read_write_distribution.cpp227
-rw-r--r--src/mongo/db/s/analyze_shard_key_read_write_distribution.h148
-rw-r--r--src/mongo/db/s/analyze_shard_key_read_write_distribution_test.cpp695
-rw-r--r--src/mongo/s/analyze_shard_key_cmd.idl65
-rw-r--r--src/mongo/s/chunk_manager.cpp4
-rw-r--r--src/mongo/s/chunk_manager.h4
7 files changed, 1139 insertions, 6 deletions
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 6bf33492fad..9a91d8d232e 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -410,6 +410,7 @@ env.Library(
'add_shard_cmd.cpp',
'analyze_shard_key_cmd.cpp',
'analyze_shard_key_cmd_util.cpp',
+ 'analyze_shard_key_read_write_distribution.cpp',
'auto_split_vector_command.cpp',
'check_sharding_index_command.cpp',
'cleanup_orphaned_cmd.cpp',
@@ -664,6 +665,7 @@ env.CppUnitTest(
target='db_s_shard_server_test',
source=[
'active_migrations_registry_test.cpp',
+ 'analyze_shard_key_read_write_distribution_test.cpp',
'auto_split_vector_test.cpp',
'balancer/balance_stats_test.cpp',
'chunk_split_state_driver_test.cpp',
diff --git a/src/mongo/db/s/analyze_shard_key_read_write_distribution.cpp b/src/mongo/db/s/analyze_shard_key_read_write_distribution.cpp
new file mode 100644
index 00000000000..5c16e390bf0
--- /dev/null
+++ b/src/mongo/db/s/analyze_shard_key_read_write_distribution.cpp
@@ -0,0 +1,227 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/analyze_shard_key_read_write_distribution.h"
+
+#include "mongo/db/db_raii.h"
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/query/collation/collation_index_key.h"
+#include "mongo/db/query/collation/collator_factory_interface.h"
+#include "mongo/db/query/internal_plans.h"
+#include "mongo/db/s/shard_key_index_util.h"
+#include "mongo/logv2/log.h"
+#include "mongo/s/cluster_commands_helpers.h"
+#include "mongo/s/grid.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
+namespace mongo {
+namespace analyze_shard_key {
+
+namespace {
+
+/**
+ * Returns true if the given object contains any of the given fields.
+ */
+bool hasAnyFieldName(const BSONObj& obj, const std::set<StringData>& fieldNames) {
+ BSONObjIterator it(obj);
+ while (it.more()) {
+ auto e = it.next();
+ auto fieldName = e.fieldNameStringData();
+ if (fieldNames.find(fieldName) != fieldNames.end()) {
+ return true;
+ }
+ if (e.type() == Object) {
+ if (hasAnyFieldName(e.embeddedObject(), fieldNames)) {
+ return true;
+ }
+ }
+ if (e.type() == Array) {
+ for (const auto& innerE : e.Array()) {
+ if (innerE.type() == Object) {
+ if (hasAnyFieldName(innerE.embeddedObject(), fieldNames)) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/**
+ * Returns true if the query that specifies the given collation against the collection with the
+ * given default collator has simple collation.
+ */
+bool hasSimpleCollation(const CollatorInterface* defaultCollator, const BSONObj& collation) {
+ if (collation.isEmpty()) {
+ return !defaultCollator;
+ }
+ return SimpleBSONObjComparator::kInstance.evaluate(collation == CollationSpec::kSimpleSpec);
+}
+
+/**
+ * Returns true if the given shard key contains any collatable fields (ones that can be affected in
+ * comparison or sort order by collation).
+ */
+bool shardKeyHasCollatableType(const ShardKeyPattern& shardKeyPattern, const BSONObj& shardKey) {
+ for (const BSONElement& elt : shardKey) {
+ if (CollationIndexKey::isCollatableType(elt.type())) {
+ return true;
+ }
+ if (shardKeyPattern.isHashedPattern() &&
+ shardKeyPattern.getHashedField().fieldNameStringData() == elt.fieldNameStringData()) {
+ // If the field is specified as "hashed" in the shard key pattern, then the hash value
+ // could have come from a collatable type.
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace
+
+template <typename DistributionMetricsType, typename SampleSizeType>
+DistributionMetricsType
+DistributionMetricsCalculator<DistributionMetricsType, SampleSizeType>::_getMetrics() const {
+ DistributionMetricsType metrics(_getSampleSize());
+ if (metrics.getSampleSize().getTotal() > 0) {
+ metrics.setNumTargetedOneShard(_numTargetedOneShard);
+ metrics.setNumTargetedMultipleShards(_numTargetedMultipleShards);
+ metrics.setNumTargetedAllShards(_numTargetedAllShards);
+
+ std::vector<int64_t> numDispatchedByRange;
+ for (auto& [_, numDispatched] : _numDispatchedByRange) {
+ numDispatchedByRange.push_back(numDispatched);
+ }
+ metrics.setNumDispatchedByRange(numDispatchedByRange);
+ }
+ return metrics;
+}
+
+template <typename DistributionMetricsType, typename SampleSizeType>
+BSONObj
+DistributionMetricsCalculator<DistributionMetricsType, SampleSizeType>::_incrementMetricsForQuery(
+ OperationContext* opCtx,
+ const BSONObj& filter,
+ const BSONObj& collation,
+ const boost::optional<LegacyRuntimeConstants>& runtimeConstants,
+ const boost::optional<BSONObj>& letParameters) {
+ auto shardKey = uassertStatusOK(
+ _getShardKeyPattern().extractShardKeyFromQuery(opCtx, _targeter.getNS(), filter));
+
+ // Increment metrics about range targeting.
+ auto&& cif = [&]() {
+ if (collation.isEmpty()) {
+ return std::unique_ptr<CollatorInterface>{};
+ }
+ return uassertStatusOK(
+ CollatorFactoryInterface::get(opCtx->getServiceContext())->makeFromBSON(collation));
+ }();
+ auto expCtx = make_intrusive<ExpressionContext>(
+ opCtx, std::move(cif), _getChunkManager().getNss(), runtimeConstants, letParameters);
+
+ std::set<ShardId> shardIds; // This is not used.
+ std::set<ChunkRange> chunkRanges;
+ _getChunkManager().getShardIdsForQuery(expCtx, filter, collation, &shardIds, &chunkRanges);
+ _incrementTargetedRanges(chunkRanges);
+
+ // Increment metrics about sharding targeting.
+ if (!shardKey.isEmpty()) {
+ // This query filters by shard key equality. If the query has a simple collation or the
+ // shard key doesn't contain a collatable field, then it is guaranteed to target only one
+ // shard. Otherwise, the number of shards that it targets depend on how the shard key range
+ // is distributed among shards. Given this, pessimistically classify it as targeting to
+ // multiple shards.
+ if (hasSimpleCollation(_getDefaultCollator(), collation) ||
+ !shardKeyHasCollatableType(_getShardKeyPattern(), shardKey)) {
+ _incrementTargetedOneShard();
+ invariant(chunkRanges.size() == 1U);
+ } else {
+ _incrementTargetedMultipleShards();
+ }
+ } else {
+ if (hasAnyFieldName(filter, {_firstShardKeyFieldName})) {
+ // This query filters by shard key range. Again, the number of shards that it targets
+ // depends on how the shard key range is distributed among shards. Given this,
+ // pessimistically classify it as targeting to multiple shards.
+ _incrementTargetedMultipleShards();
+ } else {
+ // This query doesn't filter by shard key at all. Therefore, it always targets all
+ // shards.
+ _incrementTargetedAllShards();
+ invariant((int)chunkRanges.size() == _getChunkManager().numChunks());
+ }
+ }
+
+ return shardKey;
+}
+
+ReadSampleSize ReadDistributionMetricsCalculator::_getSampleSize() const {
+ ReadSampleSize sampleSize;
+ sampleSize.setTotal(_numFind + _numAggregate + _numCount + _numDistinct);
+ sampleSize.setFind(_numFind);
+ sampleSize.setAggregate(_numAggregate);
+ sampleSize.setCount(_numCount);
+ sampleSize.setDistinct(_numDistinct);
+ return sampleSize;
+}
+
+ReadDistributionMetrics ReadDistributionMetricsCalculator::getMetrics() const {
+ return _getMetrics();
+}
+
+void ReadDistributionMetricsCalculator::addQuery(OperationContext* opCtx,
+ const SampledQueryDocument& doc) {
+ switch (doc.getCmdName()) {
+ case SampledCommandNameEnum::kFind:
+ _numFind++;
+ break;
+ case SampledCommandNameEnum::kAggregate:
+ _numAggregate++;
+ break;
+ case SampledCommandNameEnum::kCount:
+ _numCount++;
+ break;
+ case SampledCommandNameEnum::kDistinct:
+ _numDistinct++;
+ break;
+ default:
+ MONGO_UNREACHABLE;
+ }
+
+ auto cmd = SampledReadCommand::parse(IDLParserContext("ReadDistributionMetricsCalculator"),
+ doc.getCmd());
+ _incrementMetricsForQuery(opCtx, cmd.getFilter(), cmd.getCollation());
+}
+
+} // namespace analyze_shard_key
+} // namespace mongo
diff --git a/src/mongo/db/s/analyze_shard_key_read_write_distribution.h b/src/mongo/db/s/analyze_shard_key_read_write_distribution.h
new file mode 100644
index 00000000000..47613e3a57e
--- /dev/null
+++ b/src/mongo/db/s/analyze_shard_key_read_write_distribution.h
@@ -0,0 +1,148 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/s/analyze_shard_key_cmd_gen.h"
+#include "mongo/s/analyze_shard_key_common_gen.h"
+#include "mongo/s/analyze_shard_key_documents_gen.h"
+#include "mongo/s/collection_routing_info_targeter.h"
+
+namespace mongo {
+namespace analyze_shard_key {
+
+/**
+ * The utility class for calculating read or write distribution metrics for sampled queries against
+ * the collection with the given routing info.
+ */
+template <typename DistributionMetricsType, typename SampleSizeType>
+class DistributionMetricsCalculator {
+public:
+ DistributionMetricsCalculator(const CollectionRoutingInfoTargeter& targeter)
+ : _targeter(targeter),
+ _firstShardKeyFieldName(
+ _getChunkManager().getShardKeyPattern().toBSON().firstElement().fieldName()) {
+ _getChunkManager().forEachChunk([&](const auto& chunk) {
+ _numDispatchedByRange.emplace(std::make_pair(chunk.getRange(), 0));
+ return true;
+ });
+ };
+
+ /**
+ * Calculates metrics for the given sampled query.
+ */
+ virtual void addQuery(OperationContext* opCtx, const SampledQueryDocument& doc) = 0;
+
+ /**
+ * Returns the metrics calculated based on the sampled queries added so far.
+ */
+ virtual DistributionMetricsType getMetrics() const = 0;
+
+protected:
+ virtual SampleSizeType _getSampleSize() const = 0;
+
+ DistributionMetricsType _getMetrics() const;
+
+ void _incrementTargetedOneShard() {
+ _numTargetedOneShard++;
+ }
+
+ void _incrementTargetedMultipleShards() {
+ _numTargetedMultipleShards++;
+ }
+
+ void _incrementTargetedAllShards() {
+ _numTargetedAllShards++;
+ }
+
+ void _incrementTargetedRanges(const std::set<ChunkRange>& chunkRanges) {
+ for (const auto& chunkRange : chunkRanges) {
+ auto it = _numDispatchedByRange.find(chunkRange);
+ invariant(it != _numDispatchedByRange.end());
+ it->second++;
+ }
+ }
+
+ /**
+ * The helper for 'addQuery'. Increments the metrics for the query with the given filter,
+ * collation, run-time contants and let parameters (the last two are only applicable to writes).
+ * If the query filters by shard key equality, returns the shard key value.
+ */
+ BSONObj _incrementMetricsForQuery(
+ OperationContext* opCtx,
+ const BSONObj& filter,
+ const BSONObj& collation,
+ const boost::optional<LegacyRuntimeConstants>& runtimeConstants = boost::none,
+ const boost::optional<BSONObj>& letParameters = boost::none);
+
+ const ChunkManager& _getChunkManager() const {
+ return _targeter.getRoutingInfo().cm;
+ }
+
+ const ShardKeyPattern& _getShardKeyPattern() const {
+ return _getChunkManager().getShardKeyPattern();
+ }
+
+ const CollatorInterface* _getDefaultCollator() const {
+ return _getChunkManager().getDefaultCollator();
+ }
+
+ const CollectionRoutingInfoTargeter& _targeter;
+ const StringData _firstShardKeyFieldName;
+
+ int64_t _numTargetedOneShard = 0;
+ int64_t _numTargetedMultipleShards = 0;
+ int64_t _numTargetedAllShards = 0;
+
+ std::map<ChunkRange, int64_t> _numDispatchedByRange;
+};
+
+class ReadDistributionMetricsCalculator
+ : public DistributionMetricsCalculator<ReadDistributionMetrics, ReadSampleSize> {
+public:
+ ReadDistributionMetricsCalculator(const CollectionRoutingInfoTargeter& targeter)
+ : DistributionMetricsCalculator(targeter) {}
+
+ void addQuery(OperationContext* opCtx, const SampledQueryDocument& doc) override;
+
+ ReadDistributionMetrics getMetrics() const override;
+
+private:
+ ReadSampleSize _getSampleSize() const override;
+
+ int64_t _numFind = 0;
+ int64_t _numAggregate = 0;
+ int64_t _numCount = 0;
+ int64_t _numDistinct = 0;
+};
+
+} // namespace analyze_shard_key
+} // namespace mongo
diff --git a/src/mongo/db/s/analyze_shard_key_read_write_distribution_test.cpp b/src/mongo/db/s/analyze_shard_key_read_write_distribution_test.cpp
new file mode 100644
index 00000000000..641334268e5
--- /dev/null
+++ b/src/mongo/db/s/analyze_shard_key_read_write_distribution_test.cpp
@@ -0,0 +1,695 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/s/analyze_shard_key_read_write_distribution.h"
+
+#include "mongo/db/hasher.h"
+#include "mongo/db/query/collation/collator_factory_interface.h"
+#include "mongo/db/query/collation/collator_interface_mock.h"
+#include "mongo/db/s/shard_server_test_fixture.h"
+#include "mongo/idl/server_parameter_test_util.h"
+#include "mongo/logv2/log.h"
+#include "mongo/s/analyze_shard_key_documents_gen.h"
+#include "mongo/unittest/death_test.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
+namespace mongo {
+namespace analyze_shard_key {
+namespace {
+
+const auto kSampledReadCommandNames =
+ std::vector<SampledCommandNameEnum>{SampledCommandNameEnum::kFind,
+ SampledCommandNameEnum::kAggregate,
+ SampledCommandNameEnum::kCount,
+ SampledCommandNameEnum::kDistinct};
+
+struct ReadTargetMetricsBundle {
+ int64_t numTargetedOneShard = 0;
+ int64_t numTargetedMultipleShards = 0;
+ int64_t numTargetedAllShards = 0;
+ std::vector<int64_t> numDispatchedByRange;
+};
+
+struct ChunkSplitInfo {
+ ShardKeyPattern shardKeyPattern;
+ std::vector<BSONObj> splitPoints;
+};
+
+struct ReadWriteDistributionTest : public ShardServerTestFixture {
+protected:
+ /**
+ * Returns a CollectionRoutingInfoTargeter with the given shard key pattern, split points and
+ * default collator.
+ */
+ CollectionRoutingInfoTargeter makeCollectionRoutingInfoTargeter(
+ const ChunkSplitInfo& chunkSplitInfo,
+ std::unique_ptr<CollatorInterface> defaultCollator = nullptr) const {
+ auto splitPointsIncludingEnds(chunkSplitInfo.splitPoints);
+ splitPointsIncludingEnds.insert(splitPointsIncludingEnds.begin(),
+ chunkSplitInfo.shardKeyPattern.getKeyPattern().globalMin());
+ splitPointsIncludingEnds.push_back(
+ chunkSplitInfo.shardKeyPattern.getKeyPattern().globalMax());
+
+ const Timestamp timestamp{Timestamp(100, 1)};
+ ChunkVersion version({OID::gen(), timestamp}, {1, 0});
+
+ std::vector<ChunkType> chunks;
+ for (size_t i = 1; i < splitPointsIncludingEnds.size(); ++i) {
+ ChunkType chunk(collUuid,
+ {chunkSplitInfo.shardKeyPattern.getKeyPattern().extendRangeBound(
+ splitPointsIncludingEnds[i - 1], false),
+ chunkSplitInfo.shardKeyPattern.getKeyPattern().extendRangeBound(
+ splitPointsIncludingEnds[i], false)},
+ version,
+ ShardId{str::stream() << (i - 1)});
+ chunk.setName(OID::gen());
+
+ chunks.push_back(chunk);
+ version.incMajor();
+ }
+
+ auto routingTableHistory =
+ RoutingTableHistory::makeNew(nss,
+ collUuid,
+ chunkSplitInfo.shardKeyPattern.getKeyPattern(),
+ std::move(defaultCollator) /* collator */,
+ false /* unique */,
+ OID::gen(),
+ timestamp,
+ boost::none /* timeseriesFields */,
+ boost::none /* reshardingFields */,
+ boost::none /* maxChunkSizeBytes */,
+ true /* allowMigrations */,
+ chunks);
+
+ auto cm = ChunkManager(ShardId("dummyPrimaryShard"),
+ DatabaseVersion(UUID::gen(), timestamp),
+ RoutingTableHistoryValueHandle(std::make_shared<RoutingTableHistory>(
+ std::move(routingTableHistory))),
+ boost::none);
+ return CollectionRoutingInfoTargeter(
+ CollectionRoutingInfo{std::move(cm), boost::optional<GlobalIndexesCache>(boost::none)});
+ }
+
+ SampledCommandNameEnum getRandomSampledReadCommandName() const {
+ return kSampledReadCommandNames[std::rand() % kSampledReadCommandNames.size()];
+ }
+
+ SampledQueryDocument makeSampledReadQueryDocument(SampledCommandNameEnum cmdName,
+ const BSONObj& filter,
+ const BSONObj& collation = BSONObj()) const {
+ auto cmd = SampledReadCommand{filter, collation};
+ return {UUID::gen(), nss, collUuid, cmdName, cmd.toBSON()};
+ }
+
+ SampledQueryDocument makeSampledUpdateQueryDocument(
+ const std::vector<write_ops::UpdateOpEntry>& updateOps) const {
+ write_ops::UpdateCommandRequest cmd(nss);
+ cmd.setUpdates(updateOps);
+ return {UUID::gen(),
+ nss,
+ collUuid,
+ SampledCommandNameEnum::kUpdate,
+ cmd.toBSON(BSON("$db" << nss.db().toString()))};
+ }
+
+ SampledQueryDocument makeSampledDeleteQueryDocument(
+ const std::vector<write_ops::DeleteOpEntry>& deleteOps) const {
+ write_ops::DeleteCommandRequest cmd(nss);
+ cmd.setDeletes(deleteOps);
+ return {UUID::gen(),
+ nss,
+ collUuid,
+ SampledCommandNameEnum::kDelete,
+ cmd.toBSON(BSON("$db" << nss.db().toString()))};
+ }
+
+ SampledQueryDocument makeSampledFindAndModifyQueryDocument(
+ const BSONObj& filter,
+ const write_ops::UpdateModification& update,
+ bool upsert,
+ bool remove,
+ const BSONObj& collation = BSONObj()) const {
+ write_ops::FindAndModifyCommandRequest cmd(nss);
+ cmd.setQuery(filter);
+ cmd.setUpdate(update);
+ cmd.setUpsert(upsert);
+ cmd.setRemove(remove);
+ cmd.setCollation(collation);
+ return {UUID::gen(),
+ nss,
+ collUuid,
+ SampledCommandNameEnum::kFindAndModify,
+ cmd.toBSON(BSON("$db" << nss.db().toString()))};
+ }
+
+ void assertTargetMetricsForReadQuery(const CollectionRoutingInfoTargeter& targeter,
+ const SampledQueryDocument& queryDoc,
+ const ReadTargetMetricsBundle& expectedMetrics) const {
+ ReadDistributionMetricsCalculator readDistributionCalculator(targeter);
+ readDistributionCalculator.addQuery(operationContext(), queryDoc);
+
+ auto metrics = readDistributionCalculator.getMetrics();
+ ASSERT_EQ(*metrics.getNumTargetedOneShard(), expectedMetrics.numTargetedOneShard);
+ ASSERT_EQ(*metrics.getNumTargetedMultipleShards(),
+ expectedMetrics.numTargetedMultipleShards);
+ ASSERT_EQ(*metrics.getNumTargetedAllShards(), expectedMetrics.numTargetedAllShards);
+ ASSERT_EQ(*metrics.getNumDispatchedByRange(), expectedMetrics.numDispatchedByRange);
+ }
+
+ const NamespaceString nss{"testDb", "testColl"};
+ const UUID collUuid = UUID::gen();
+
+ // Define two set of ChunkSplintInfo's for testing.
+
+ // 'chunkSplitInfoRangeSharding0' and 'chunkSplitInfoHashedSharding0' make the collection have
+ // chunks for the following shard key ranges:
+ // {a.x: MinKey, b.y: Minkey} -> {a.x: -100, b.y: "A"}
+ // {a.x: -100, b.y: "A"} -> {a.x: 100, b.y: "A"}
+ // {a.x: 100, b.y: "A"} -> {a.x: MaxKey, b.y: MaxKey}
+ const std::vector<BSONObj> splitPoints0 = {BSON("a.x" << -100 << "b.y"
+ << "A"),
+ BSON("a.x" << 100 << "b.y"
+ << "A")};
+ const ChunkSplitInfo chunkSplitInfoRangeSharding0{
+ ShardKeyPattern{BSON("a.x" << 1 << "b.y" << 1)}, splitPoints0};
+ const ChunkSplitInfo chunkSplitInfoHashedSharding0{
+ ShardKeyPattern{BSON("a.x" << 1 << "b.y"
+ << "hashed")},
+ std::vector<BSONObj>{
+ BSON("a.x" << splitPoints0[0]["a.x"].Int() << "b.y"
+ << BSONElementHasher::hash64(splitPoints0[0]["b.y"],
+ BSONElementHasher::DEFAULT_HASH_SEED)),
+ BSON("a.x" << splitPoints0[1]["a.x"].Int() << "b.y"
+ << BSONElementHasher::hash64(splitPoints0[1]["b.y"],
+ BSONElementHasher::DEFAULT_HASH_SEED))}};
+
+ // 'chunkSplitInfoRangeSharding1' and 'chunkSplitInfoHashedSharding1' make the collection
+ // have chunks for the following shard key ranges:
+ // {a: MinKey, b.y: Minkey} -> {a: {x: -100}, b.y: "A"}
+ // {a: {x: -100}, b.y: "A"} -> {a: {x: 100}, b.y: "A"}
+ // {a: {x: 100}, b.y: "A"} -> {a: MaxKey, b.y: MaxKey}
+ const std::vector<BSONObj> splitPoints1 = {BSON("a" << BSON("x" << -100) << "b.y"
+ << "A"),
+ BSON("a" << BSON("x" << 100) << "b.y"
+ << "A")};
+ const ChunkSplitInfo chunkSplitInfoRangeSharding1{ShardKeyPattern{BSON("a" << 1 << "b.y" << 1)},
+ splitPoints1};
+ const ChunkSplitInfo chunkSplitInfoHashedSharding1{
+ ShardKeyPattern{BSON("a" << 1 << "b.y"
+ << "hashed")},
+ std::vector<BSONObj>{
+ BSON("a" << splitPoints1[0]["a"].wrap() << "b.y"
+ << BSONElementHasher::hash64(splitPoints1[0]["b.y"],
+ BSONElementHasher::DEFAULT_HASH_SEED)),
+ BSON("a" << splitPoints1[1]["a"].wrap() << "b.y"
+ << BSONElementHasher::hash64(splitPoints1[1]["b.y"],
+ BSONElementHasher::DEFAULT_HASH_SEED))}};
+
+ const BSONObj emptyCollation = {};
+ const BSONObj simpleCollation = CollationSpec::kSimpleSpec;
+ // Using a case-insensitive collation would cause a collatable point-query involving a chunk
+ // bound to touch more than one chunk.
+ const BSONObj caseInsensitiveCollation =
+ BSON(Collation::kLocaleFieldName << "en_US"
+ << "strength" << 1 << "caseLevel" << false);
+};
+
+TEST_F(ReadWriteDistributionTest, ReadDistributionNoQueries) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ ReadDistributionMetricsCalculator readDistributionCalculator(targeter);
+ auto metrics = readDistributionCalculator.getMetrics();
+
+ auto sampleSize = metrics.getSampleSize();
+ ASSERT_EQ(sampleSize.getTotal(), 0);
+ ASSERT_EQ(sampleSize.getFind(), 0);
+ ASSERT_EQ(sampleSize.getAggregate(), 0);
+ ASSERT_EQ(sampleSize.getCount(), 0);
+ ASSERT_EQ(sampleSize.getDistinct(), 0);
+
+ ASSERT_FALSE(metrics.getNumTargetedOneShard());
+ ASSERT_FALSE(metrics.getNumTargetedMultipleShards());
+ ASSERT_FALSE(metrics.getNumTargetedAllShards());
+ ASSERT_FALSE(metrics.getNumDispatchedByRange());
+}
+
+TEST_F(ReadWriteDistributionTest, ReadDistributionSampleSize) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ ReadDistributionMetricsCalculator readDistributionCalculator(targeter);
+
+ // Add one find query.
+ auto filter0 = BSON("a.x" << 0);
+ readDistributionCalculator.addQuery(
+ operationContext(), makeSampledReadQueryDocument(SampledCommandNameEnum::kFind, filter0));
+
+ // Add two aggregate queries.
+ auto filter1 = BSON("a.x" << 1);
+ readDistributionCalculator.addQuery(
+ operationContext(),
+ makeSampledReadQueryDocument(SampledCommandNameEnum::kAggregate, filter1));
+ auto filter2 = BSON("a.x" << 2);
+ readDistributionCalculator.addQuery(
+ operationContext(),
+ makeSampledReadQueryDocument(SampledCommandNameEnum::kAggregate, filter2));
+
+ // Add three count queries.
+ auto filter3 = BSON("a.x" << 3);
+ readDistributionCalculator.addQuery(
+ operationContext(), makeSampledReadQueryDocument(SampledCommandNameEnum::kCount, filter3));
+ auto filter4 = BSON("a.x" << 4);
+ readDistributionCalculator.addQuery(
+ operationContext(), makeSampledReadQueryDocument(SampledCommandNameEnum::kCount, filter4));
+ auto filter5 = BSON("a.x" << 5);
+ readDistributionCalculator.addQuery(
+ operationContext(), makeSampledReadQueryDocument(SampledCommandNameEnum::kCount, filter5));
+
+ // Add one distinct query.
+ auto filter6 = BSON("a.x" << 6);
+ readDistributionCalculator.addQuery(
+ operationContext(),
+ makeSampledReadQueryDocument(SampledCommandNameEnum::kDistinct, filter6));
+
+ auto metrics = readDistributionCalculator.getMetrics();
+ auto sampleSize = metrics.getSampleSize();
+ ASSERT_EQ(sampleSize.getTotal(), 7);
+ ASSERT_EQ(sampleSize.getFind(), 1);
+ ASSERT_EQ(sampleSize.getAggregate(), 2);
+ ASSERT_EQ(sampleSize.getCount(), 3);
+ ASSERT_EQ(sampleSize.getDistinct(), 1);
+}
+
+DEATH_TEST_F(ReadWriteDistributionTest, ReadDistributionCannotAddUpdateQuery, "invariant") {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ ReadDistributionMetricsCalculator readDistributionCalculator(targeter);
+
+ auto filter = BSON("a.x" << 0);
+ auto updateOp = write_ops::UpdateOpEntry(
+ filter, write_ops::UpdateModification(BSON("$set" << BSON("c" << 0))));
+ readDistributionCalculator.addQuery(operationContext(),
+ makeSampledUpdateQueryDocument({updateOp}));
+}
+
+DEATH_TEST_F(ReadWriteDistributionTest, ReadDistributionCannotAddDeleteQuery, "invariant") {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ ReadDistributionMetricsCalculator readDistributionCalculator(targeter);
+
+ auto filter = BSON("a.x" << 0);
+ auto deleteOp = write_ops::DeleteOpEntry(filter, false /* multi */);
+ readDistributionCalculator.addQuery(operationContext(),
+ makeSampledDeleteQueryDocument({deleteOp}));
+}
+
+DEATH_TEST_F(ReadWriteDistributionTest, ReadDistributionCannotAddFindAndModifyQuery, "invariant") {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ ReadDistributionMetricsCalculator readDistributionCalculator(targeter);
+
+ auto filter = BSON("a.x" << 0);
+ auto updateMod = write_ops::UpdateModification(BSON("$set" << BSON("c" << 0)));
+ readDistributionCalculator.addQuery(
+ operationContext(),
+ makeSampledFindAndModifyQueryDocument(
+ filter, updateMod, false /* upsert */, false /* remove */));
+}
+
+class ReadDistributionFilterByShardKeyEqualityTest : public ReadWriteDistributionTest {
+protected:
+ void assertTargetMetrics(const CollectionRoutingInfoTargeter& targeter,
+ const SampledQueryDocument& queryDoc,
+ const std::vector<int64_t>& numDispatchedByRange,
+ bool hasSimpleCollation,
+ bool hasCollatableType) const {
+ ReadTargetMetricsBundle metrics;
+ if (hasSimpleCollation || !hasCollatableType) {
+ metrics.numTargetedOneShard = 1;
+ } else {
+ metrics.numTargetedMultipleShards = 1;
+ }
+ metrics.numDispatchedByRange = numDispatchedByRange;
+ assertTargetMetricsForReadQuery(targeter, queryDoc, metrics);
+ }
+};
+
+TEST_F(ReadDistributionFilterByShardKeyEqualityTest, ShardKeyEqualityOrdered) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filters = std::vector<BSONObj>{BSON("a.x" << -100 << "b.y"
+ << "A"),
+ BSON("a" << BSON("x" << 0) << "b.y"
+ << "A"),
+ BSON("a" << BSON("x" << 0) << "b"
+ << BSON("y"
+ << "A")),
+ BSON("a" << BSON("x" << 0) << "b"
+ << BSON("y"
+ << "A"))};
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ auto hasSimpleCollation = true;
+ auto hasCollatableType = true;
+
+ for (const auto& filter : filters) {
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+ }
+}
+
+TEST_F(ReadDistributionFilterByShardKeyEqualityTest, ShardKeyEqualityNotOrdered) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("b.y"
+ << "A"
+ << "a.x" << 0);
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ auto hasSimpleCollation = true;
+ auto hasCollatableType = true;
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyEqualityTest, ShardKeyEqualityAdditionalFields) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("_id" << 0 << "a.x" << 100 << "b.y"
+ << "A");
+ auto numDispatchedByRange = std::vector<int64_t>({0, 0, 1});
+ auto hasSimpleCollation = true;
+ auto hasCollatableType = true;
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyEqualityTest,
+ ShardKeyEqualityNonSimpleCollation_ShardKeyContainsCollatableFields) {
+ auto hasSimpleCollation = false;
+ auto hasCollatableType = true;
+
+ auto assertMetrics = [&](const ChunkSplitInfo& chunkSplitInfo,
+ const BSONObj& filter,
+ const std::vector<int64_t>& numDispatchedByRange) {
+ // The collection has a non-simple default collation and the query specifies an empty
+ // collation.
+ auto targeter0 = makeCollectionRoutingInfoTargeter(
+ chunkSplitInfo,
+ uassertStatusOK(CollatorFactoryInterface::get(getServiceContext())
+ ->makeFromBSON(caseInsensitiveCollation)));
+ assertTargetMetrics(
+ targeter0,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter, emptyCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection has a simple default collation and the query specifies a non-simple
+ // collation.
+ auto targeter1 = makeCollectionRoutingInfoTargeter(
+ chunkSplitInfo,
+ uassertStatusOK(
+ CollatorFactoryInterface::get(getServiceContext())->makeFromBSON(simpleCollation)));
+ assertTargetMetrics(targeter1,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(),
+ filter,
+ caseInsensitiveCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection doesn't have a default collation and the query specifies a non-simple
+ // collation.
+ auto targeter2 = makeCollectionRoutingInfoTargeter(chunkSplitInfo);
+ assertTargetMetrics(targeter2,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(),
+ filter,
+ caseInsensitiveCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+ };
+
+ auto filter = BSON("a.x" << -100 << "b.y"
+ << "A");
+ auto numDispatchedByRange = std::vector<int64_t>({1, 1, 0});
+ assertMetrics(chunkSplitInfoRangeSharding0, filter, numDispatchedByRange);
+ assertMetrics(chunkSplitInfoHashedSharding0, filter, numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyEqualityTest,
+ ShardKeyEqualityNonSimpleCollation_ShardKeyDoesNotContainCollatableFields) {
+ auto filter = BSON("a.x" << -100 << "b.y" << 0);
+ auto numDispatchedByRange = std::vector<int64_t>({1, 0, 0});
+ auto hasSimpleCollation = false;
+ auto hasCollatableType = false;
+
+ // The collection has a non-simple default collation and the query specifies an empty
+ // collation.
+ auto targeter0 = makeCollectionRoutingInfoTargeter(
+ chunkSplitInfoRangeSharding0,
+ uassertStatusOK(CollatorFactoryInterface::get(getServiceContext())
+ ->makeFromBSON(caseInsensitiveCollation)));
+ assertTargetMetrics(
+ targeter0,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter, emptyCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection has a simple default collation and the query specifies a non-simple
+ // collation.
+ auto targeter1 = makeCollectionRoutingInfoTargeter(
+ chunkSplitInfoRangeSharding0,
+ uassertStatusOK(
+ CollatorFactoryInterface::get(getServiceContext())->makeFromBSON(simpleCollation)));
+ assertTargetMetrics(targeter1,
+ makeSampledReadQueryDocument(
+ getRandomSampledReadCommandName(), filter, caseInsensitiveCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection doesn't have a default collation and the query specifies a non-simple
+ // collation.
+ auto targeter2 = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ assertTargetMetrics(targeter2,
+ makeSampledReadQueryDocument(
+ getRandomSampledReadCommandName(), filter, caseInsensitiveCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyEqualityTest, ShardKeyEqualitySimpleCollation) {
+ auto filter = BSON("a.x" << -100 << "b.y"
+ << "A");
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ auto hasSimpleCollation = true;
+ auto hasCollatableType = true;
+
+ // The collection has a simple default collation and the query specifies an empty collation.
+ auto targeter0 = makeCollectionRoutingInfoTargeter(
+ chunkSplitInfoRangeSharding0,
+ uassertStatusOK(
+ CollatorFactoryInterface::get(getServiceContext())->makeFromBSON(simpleCollation)));
+ assertTargetMetrics(
+ targeter0,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter, emptyCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection has a non-simple default collation and the query specifies a simple
+ // collation.
+ auto targeter1 = makeCollectionRoutingInfoTargeter(
+ chunkSplitInfoRangeSharding0,
+ uassertStatusOK(CollatorFactoryInterface::get(getServiceContext())
+ ->makeFromBSON(caseInsensitiveCollation)));
+ assertTargetMetrics(
+ targeter1,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter, simpleCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection doesn't have a default collation and the query specifies a simple
+ // collation.
+ auto targeter2 = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ assertTargetMetrics(
+ targeter2,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter, simpleCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+
+ // The collection doesn't have a default collation and the query specifies an empty
+ // collation.
+ auto targeter3 = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ assertTargetMetrics(
+ targeter3,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter, emptyCollation),
+ numDispatchedByRange,
+ hasSimpleCollation,
+ hasCollatableType);
+}
+
+class ReadDistributionFilterByShardKeyRangeTest : public ReadWriteDistributionTest {
+protected:
+ void assertTargetMetrics(const CollectionRoutingInfoTargeter& targeter,
+ const SampledQueryDocument& queryDoc,
+ const std::vector<int64_t>& numDispatchedByRange) const {
+ ReadTargetMetricsBundle metrics;
+ metrics.numTargetedMultipleShards = 1;
+ metrics.numDispatchedByRange = numDispatchedByRange;
+ assertTargetMetricsForReadQuery(targeter, queryDoc, metrics);
+ }
+};
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, ShardKeyPrefixEqualityDotted) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("a.x" << 0);
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, ShardKeyPrefixEqualityNotDotted) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding1);
+ auto filter = BSON("a" << BSON("x" << 0));
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, ShardKeyPrefixRangeMinKey) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("a.x" << BSON("$lt" << 1));
+ auto numDispatchedByRange = std::vector<int64_t>({1, 1, 0});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, ShardKeyPrefixRangeMaxKey) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("a.x" << BSON("$gte" << 2));
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 1});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, ShardKeyPrefixRangeNoMinOrMaxKey) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("a.x" << BSON("$gte" << -3 << "$lt" << 3));
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, FullShardKeyRange) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("a.x" << BSON("$gte" << -4 << "$lt" << 4) << "b.y"
+ << BSON("$gte"
+ << "A"
+ << "$lt"
+ << "Z"));
+ auto numDispatchedByRange = std::vector<int64_t>({0, 1, 0});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+TEST_F(ReadDistributionFilterByShardKeyRangeTest, ShardKeyNonEquality) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("a.x" << BSON("$ne" << 5));
+ auto numDispatchedByRange = std::vector<int64_t>({1, 1, 1});
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter),
+ numDispatchedByRange);
+}
+
+class ReadDistributionNotFilterByShardKeyTest : public ReadWriteDistributionTest {
+protected:
+ void assertTargetMetrics(const CollectionRoutingInfoTargeter& targeter,
+ const SampledQueryDocument& queryDoc) const {
+ ReadTargetMetricsBundle metrics;
+ metrics.numTargetedAllShards = 1;
+ metrics.numDispatchedByRange = std::vector<int64_t>({1, 1, 1});
+ assertTargetMetricsForReadQuery(targeter, queryDoc, metrics);
+ }
+};
+
+TEST_F(ReadDistributionNotFilterByShardKeyTest, ShardKeySuffixEquality) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("b.y"
+ << "A");
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter));
+}
+
+TEST_F(ReadDistributionNotFilterByShardKeyTest, NoShardKey) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ auto filter = BSON("_id" << 1);
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter));
+}
+
+TEST_F(ReadDistributionNotFilterByShardKeyTest, ShardKeyPrefixEqualityNotDotted) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding0);
+ // This query filters by "a" (exact match) not "a.x" (the shard key prefix). Currently, this
+ // is handled as a query not filtering by the shard key. As a result, it still targets all
+ // the chunks although it only matches the data in the chunk {a.x: -100, b.y: "A"} -> {a.x:
+ // 100, b.y: "A"}. Please see
+ // ReadDistributionFilterByShardKeyRangeTest/ShardKeyPrefixEqualityDotted for the case where
+ // the query filters by "a.x".
+ auto filter = BSON("a" << BSON("x" << 0));
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter));
+}
+
+TEST_F(ReadDistributionNotFilterByShardKeyTest, ShardKeyPrefixEqualityDotted) {
+ auto targeter = makeCollectionRoutingInfoTargeter(chunkSplitInfoRangeSharding1);
+ // This query filters by "a.x" not "a" (the shard key prefix). Currently, this is handled as
+ // a query not filtering by the shard key. As a result, it still targets all the chunks
+ // although it only matches the data in the chunk {a.x: -100, b.y: "A"} -> {a.x: 100, b.y:
+ // "A"}.
+ auto filter = BSON("a.x" << 0);
+ assertTargetMetrics(targeter,
+ makeSampledReadQueryDocument(getRandomSampledReadCommandName(), filter));
+}
+
+} // namespace
+} // namespace analyze_shard_key
+} // namespace mongo
diff --git a/src/mongo/s/analyze_shard_key_cmd.idl b/src/mongo/s/analyze_shard_key_cmd.idl
index 8105f23d5a0..9e681232857 100644
--- a/src/mongo/s/analyze_shard_key_cmd.idl
+++ b/src/mongo/s/analyze_shard_key_cmd.idl
@@ -96,6 +96,71 @@ structs:
validator: { gte: 0 }
optional: true
+ ReadSampleSize:
+ description: "The number of sampled read queries by command name."
+ strict: false
+ fields:
+ total:
+ type: long
+ default: 0
+ find:
+ type: long
+ default: 0
+ aggregate:
+ type: long
+ default: 0
+ count:
+ type: long
+ default: 0
+ distinct:
+ type: long
+ default: 0
+
+ WriteSampleSize:
+ description: "The number of sampled write queries by command name."
+ strict: false
+ fields:
+ total:
+ type: long
+ default: 0
+ update:
+ type: long
+ default: 0
+ delete:
+ type: long
+ default: 0
+ findAndModify:
+ type: long
+ default: 0
+
+ ReadDistributionMetrics:
+ description: "The metrics about the read distribution calculated using sampled read queries."
+ strict: true
+ fields:
+ sampleSize:
+ type: ReadSampleSize
+ numReadsTargetedOneShard:
+ type: long
+ cpp_name: numTargetedOneShard
+ validator: { gte: 0 }
+ optional: true
+ numReadsTargetedMultipleShards:
+ type: long
+ cpp_name: numTargetedMultipleShards
+ validator: { gte: 0 }
+ optional: true
+ numReadsTargetedAllShards:
+ type: long
+ cpp_name: numTargetedAllShards
+ validator: { gte: 0 }
+ optional: true
+ numDispatchedReadsByRange:
+ type: array<long>
+ cpp_name: numDispatchedByRange
+ description: "The number of dispatched read requests for each chunk range sorted from
+ MinKey to MaxKey."
+ optional: true
+
analyzeShardKeyResponse:
description: "The response for the 'analyzeShardKey' command."
strict: false
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index 44278b34b86..152c2309eb8 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -501,10 +501,6 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
const BSONObj& max,
std::set<ShardId>* shardIds,
std::set<ChunkRange>* chunkRanges) const {
- if (chunkRanges) {
- invariant(chunkRanges->empty());
- }
-
// If our range is [MinKey, MaxKey], we can simply return all shard ids right away. However,
// this optimization does not apply when we are reading from a snapshot because _shardVersions
// contains shards with chunks and is built based on the last refresh. Therefore, it is
diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h
index b1dcbb3f815..079d1147d12 100644
--- a/src/mongo/s/chunk_manager.h
+++ b/src/mongo/s/chunk_manager.h
@@ -616,7 +616,7 @@ public:
/**
* Finds the shard IDs for a given filter and collation. If collation is empty, we use the
* collection default collation for targeting.
- * If output parameter `changeRanges` is non-null, the set is populated with ChunkRanges that
+ * If output parameter `chunkRanges` is non-null, the set is populated with ChunkRanges that
* would be targeted by the query; if nullptr, no processing of chunk ranges occurs.
*/
void getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> expCtx,
@@ -628,7 +628,7 @@ public:
/**
* Returns all shard ids which contain chunks overlapping the range [min, max]. Please note the
* inclusive bounds on both sides (SERVER-20768).
- * If output parameter `changeRanges` is non-null, the set is populated with ChunkRanges that
+ * If output parameter `chunkRanges` is non-null, the set is populated with ChunkRanges that
* would be targeted by the query.
*/
void getShardIdsForRange(const BSONObj& min,