summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2021-06-07 20:24:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-16 16:32:29 +0000
commit5c00024e3cf4a27039117e000e475c6ee797c700 (patch)
treefed602657828a36fae67ac13879b16181c977b04
parent796a9f1504ea088bca412b8af787b735943245cf (diff)
downloadmongo-5c00024e3cf4a27039117e000e475c6ee797c700.tar.gz
SERVER-55430 Record metrics about whether a collection is rebalanced after resharding op finishes
-rw-r--r--jstests/sharding/resharding_metrics.js52
-rw-r--r--src/mongo/db/s/SConscript2
-rw-r--r--src/mongo/db/s/balancer/balance_stats.cpp84
-rw-r--r--src/mongo/db/s/balancer/balance_stats.h50
-rw-r--r--src/mongo/db/s/balancer/balance_stats_test.cpp188
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp43
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.h5
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp6
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics.cpp13
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics.h3
10 files changed, 428 insertions, 18 deletions
diff --git a/jstests/sharding/resharding_metrics.js b/jstests/sharding/resharding_metrics.js
index f15644dc355..f051634ef7c 100644
--- a/jstests/sharding/resharding_metrics.js
+++ b/jstests/sharding/resharding_metrics.js
@@ -29,28 +29,37 @@ function verifyMetrics(metrics, expected) {
}
}
-function verifyServerStatusOutput(reshardingTest, inputCollection, expectedMetrics) {
- function testMetricsArePresent(mongo) {
- const stats = mongo.getDB('admin').serverStatus({});
- assert(stats.hasOwnProperty('shardingStatistics'), stats);
- const shardingStats = stats.shardingStatistics;
- assert(shardingStats.hasOwnProperty('resharding'),
- `Missing resharding section in ${tojson(shardingStats)}`);
-
- const metrics = shardingStats.resharding;
- verifyMetrics(metrics, expectedMetrics);
- }
+function testMetricsArePresent(mongo, expectedMetrics) {
+ const stats = mongo.getDB('admin').serverStatus({});
+ assert(stats.hasOwnProperty('shardingStatistics'), stats);
+ const shardingStats = stats.shardingStatistics;
+ assert(shardingStats.hasOwnProperty('resharding'),
+ `Missing resharding section in ${tojson(shardingStats)}`);
+
+ const metrics = shardingStats.resharding;
+ verifyMetrics(metrics, expectedMetrics);
+}
+function verifyParticipantServerStatusOutput(reshardingTest, inputCollection, expectedMetrics) {
const donorShardNames = reshardingTest.donorShardNames;
const recipientShardNames = reshardingTest.recipientShardNames;
const mongos = inputCollection.getMongo();
const topology = DiscoverTopology.findConnectedNodes(mongos);
- testMetricsArePresent(new Mongo(topology.shards[donorShardNames[0]].primary));
- testMetricsArePresent(new Mongo(topology.shards[donorShardNames[1]].primary));
- testMetricsArePresent(new Mongo(topology.shards[recipientShardNames[0]].primary));
- testMetricsArePresent(new Mongo(topology.shards[recipientShardNames[1]].primary));
+ testMetricsArePresent(new Mongo(topology.shards[donorShardNames[0]].primary), expectedMetrics);
+ testMetricsArePresent(new Mongo(topology.shards[donorShardNames[1]].primary), expectedMetrics);
+ testMetricsArePresent(new Mongo(topology.shards[recipientShardNames[0]].primary),
+ expectedMetrics);
+ testMetricsArePresent(new Mongo(topology.shards[recipientShardNames[1]].primary),
+ expectedMetrics);
+}
+
+function verifyCoordinatorServerStatusOutput(inputCollection, expectedMetrics) {
+ const mongos = inputCollection.getMongo();
+ const topology = DiscoverTopology.findConnectedNodes(mongos);
+
+ testMetricsArePresent(new Mongo(topology.configsvr.primary), expectedMetrics);
}
// Tests the currentOp output for each donor, each recipient, and the coordinator.
@@ -155,9 +164,11 @@ var initialServerStatusMetrics = {
"bytesCopied": 0,
"oplogEntriesApplied": 0,
"countWritesDuringCriticalSection": 0,
+ "lastOpEndingChunkImbalance": 0,
};
-verifyServerStatusOutput(reshardingTest, inputCollection, initialServerStatusMetrics);
+verifyParticipantServerStatusOutput(reshardingTest, inputCollection, initialServerStatusMetrics);
+verifyCoordinatorServerStatusOutput(inputCollection, {lastOpEndingChunkImbalance: 0});
var documentsInserted = [
{_id: "stays on shard0", oldKey: -10, newKey: -10},
@@ -174,7 +185,10 @@ reshardingTest.withReshardingInBackground( //
newShardKeyPattern: {newKey: 1},
newChunks: [
{min: {newKey: MinKey}, max: {newKey: 0}, shard: recipientShardNames[0]},
- {min: {newKey: 0}, max: {newKey: MaxKey}, shard: recipientShardNames[1]},
+ {min: {newKey: 0}, max: {newKey: 10}, shard: recipientShardNames[1]},
+ {min: {newKey: 10}, max: {newKey: 20}, shard: recipientShardNames[1]},
+ {min: {newKey: 20}, max: {newKey: 30}, shard: recipientShardNames[1]},
+ {min: {newKey: 30}, max: {newKey: MaxKey}, shard: recipientShardNames[1]},
],
},
(tempNs) => {
@@ -190,9 +204,11 @@ var finalServerStatusMetrics = {
"bytesCopied": Object.bsonsize(documentsInserted[1]) + Object.bsonsize(documentsInserted[2]),
"oplogEntriesApplied": 2,
"countWritesDuringCriticalSection": 0,
+ "lastOpEndingChunkImbalance": 0,
};
-verifyServerStatusOutput(reshardingTest, inputCollection, finalServerStatusMetrics);
+verifyParticipantServerStatusOutput(reshardingTest, inputCollection, finalServerStatusMetrics);
+verifyCoordinatorServerStatusOutput(inputCollection, {lastOpEndingChunkImbalance: 3});
reshardingTest.teardown();
})();
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index d37d5f8df85..8675525b656 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -227,6 +227,7 @@ env.Library(
source=[
'add_shard_cmd.idl',
'add_shard_util.cpp',
+ 'balancer/balance_stats.cpp',
'balancer/balancer_chunk_selection_policy_impl.cpp',
'balancer/balancer_chunk_selection_policy.cpp',
'balancer/balancer_policy.cpp',
@@ -458,6 +459,7 @@ env.CppUnitTest(
'active_migrations_registry_test.cpp',
'active_move_primaries_registry_test.cpp',
'active_shard_collection_registry_test.cpp',
+ 'balancer/balance_stats_test.cpp',
'chunk_split_state_driver_test.cpp',
'collection_metadata_filtering_test.cpp',
'collection_metadata_test.cpp',
diff --git a/src/mongo/db/s/balancer/balance_stats.cpp b/src/mongo/db/s/balancer/balance_stats.cpp
new file mode 100644
index 00000000000..0ffaff0d0e8
--- /dev/null
+++ b/src/mongo/db/s/balancer/balance_stats.cpp
@@ -0,0 +1,84 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/balancer/balance_stats.h"
+
+#include "mongo/db/s/balancer/balancer_policy.h"
+#include "mongo/logv2/log.h"
+#include "mongo/s/chunk_manager.h"
+#include "mongo/stdx/unordered_map.h"
+
+namespace mongo {
+
+int64_t getMaxChunkImbalanceCount(const ChunkManager& routingInfo,
+ const std::vector<ShardType>& allShards,
+ const ZoneInfo& zoneInfo) {
+ // Map of { zone -> { shardId -> chunkCount }}
+ StringMap<stdx::unordered_map<ShardId, int64_t>> chunkDistributionPerZone;
+ int64_t zoneMaxChunkImbalance = 0;
+
+ for (const auto& shard : allShards) {
+ chunkDistributionPerZone[""][shard.getName()] = 0;
+
+ for (const auto& tag : shard.getTags()) {
+ chunkDistributionPerZone[tag][shard.getName()] = 0;
+ }
+ }
+
+ routingInfo.forEachChunk([&zoneInfo, &chunkDistributionPerZone](auto chunk) {
+ auto zone = zoneInfo.getZoneForChunk(chunk.getRange());
+ chunkDistributionPerZone[zone][chunk.getShardId()] += 1;
+ return true;
+ });
+
+ for (auto&& zoneShardPair : chunkDistributionPerZone) {
+ std::set<int64_t> chunkCountInShards;
+
+ for (auto&& shardCountPair : zoneShardPair.second) {
+ chunkCountInShards.insert(shardCountPair.second);
+ }
+
+ int64_t imbalance = 0;
+ if (!chunkCountInShards.empty()) {
+ imbalance = *chunkCountInShards.rbegin() - *chunkCountInShards.begin();
+ }
+
+ if (imbalance > zoneMaxChunkImbalance) {
+ zoneMaxChunkImbalance = imbalance;
+ }
+ }
+
+ return zoneMaxChunkImbalance;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/balancer/balance_stats.h b/src/mongo/db/s/balancer/balance_stats.h
new file mode 100644
index 00000000000..0a36a65ddc5
--- /dev/null
+++ b/src/mongo/db/s/balancer/balance_stats.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "mongo/s/catalog/type_shard.h"
+
+namespace mongo {
+
+class ChunkManager;
+class ZoneInfo;
+
+/**
+ * Returns the maximum chunk imbalance (most chunks in shard minus least chunks in shard) among each
+ * zone. The default unlabeled zone is considered as its own zone.
+ */
+int64_t getMaxChunkImbalanceCount(const ChunkManager& routingInfo,
+ const std::vector<ShardType>& allShards,
+ const ZoneInfo& zoneInfo);
+
+} // namespace mongo
diff --git a/src/mongo/db/s/balancer/balance_stats_test.cpp b/src/mongo/db/s/balancer/balance_stats_test.cpp
new file mode 100644
index 00000000000..3c825b8f317
--- /dev/null
+++ b/src/mongo/db/s/balancer/balance_stats_test.cpp
@@ -0,0 +1,188 @@
+/**
+ * Copyright (C) 2021-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/oid.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/balancer/balance_stats.h"
+#include "mongo/db/s/balancer/balancer_policy.h"
+#include "mongo/s/chunk_manager.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+class BalanceStatsTest : public mongo::unittest::Test {
+public:
+ ChunkType makeChunk(const BSONObj& minKey, const BSONObj& maxKey, const ShardId& shard) {
+ _nextVersion.incMinor();
+ return ChunkType(_nss, ChunkRange(minKey, maxKey), _nextVersion, shard);
+ }
+
+ ShardType makeShard(const std::string& name, std::vector<std::string> tags = {}) {
+ return ShardType(name, name, tags);
+ }
+
+ ChunkManager makeRoutingInfo(const KeyPattern& shardKeyPattern,
+ const std::vector<ChunkType>& chunks) {
+ auto routingTableHistory = RoutingTableHistory::makeNew(_nss,
+ boost::none, // UUID
+ shardKeyPattern,
+ {}, // collator
+ false, // unique
+ _epoch,
+ boost::none, // timestamp
+ boost::none, // time series fields
+ boost::none, // resharding fields
+ true, // allowMigration
+ chunks);
+
+ return ChunkManager(_shardPrimary,
+ _dbVersion,
+ RoutingTableHistoryValueHandle(std::move(routingTableHistory)),
+ boost::none);
+ }
+
+private:
+ const NamespaceString _nss{"foo.bar"};
+ const OID _epoch{OID::gen()};
+ const ShardId _shardPrimary{"dummyShardPrimary"};
+ const DatabaseVersion _dbVersion{UUID::gen()};
+ ChunkVersion _nextVersion{1, 0, _epoch, boost::none};
+};
+
+TEST_F(BalanceStatsTest, SingleChunkNoZones) {
+ std::vector<ShardType> shards;
+ shards.push_back(makeShard("a"));
+
+ std::vector<ChunkType> chunks;
+ chunks.push_back(makeChunk(BSON("x" << MINKEY), BSON("x" << MAXKEY), ShardId("a")));
+
+ auto routingInfo = makeRoutingInfo(KeyPattern(BSON("x" << 1)), chunks);
+ auto imbalanceCount = getMaxChunkImbalanceCount(routingInfo, shards, {});
+ ASSERT_EQ(0, imbalanceCount);
+}
+
+TEST_F(BalanceStatsTest, SingleShardHasMoreChunksNoZones) {
+ std::vector<ShardType> shards;
+ shards.push_back(makeShard("a"));
+ shards.push_back(makeShard("b"));
+
+ std::vector<ChunkType> chunks;
+ chunks.push_back(makeChunk(BSON("x" << MINKEY), BSON("x" << 10), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 10), BSON("x" << 20), ShardId("b")));
+ chunks.push_back(makeChunk(BSON("x" << 20), BSON("x" << 30), ShardId("b")));
+ chunks.push_back(makeChunk(BSON("x" << 30), BSON("x" << MAXKEY), ShardId("b")));
+
+ auto routingInfo = makeRoutingInfo(KeyPattern(BSON("x" << 1)), chunks);
+ auto imbalanceCount = getMaxChunkImbalanceCount(routingInfo, shards, {});
+ ASSERT_EQ(2, imbalanceCount);
+}
+
+TEST_F(BalanceStatsTest, BalancedChunkInShardButNotZones) {
+ std::vector<ShardType> shards;
+ shards.push_back(makeShard("a", {"zone1"}));
+ shards.push_back(makeShard("b", {"zone1"}));
+
+ std::vector<ChunkType> chunks;
+ chunks.push_back(makeChunk(BSON("x" << MINKEY), BSON("x" << 10), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 10), BSON("x" << 20), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 20), BSON("x" << 30), ShardId("b")));
+ chunks.push_back(makeChunk(BSON("x" << 30), BSON("x" << 40), ShardId("b")));
+ chunks.push_back(makeChunk(BSON("x" << 40), BSON("x" << MAXKEY), ShardId("b")));
+
+ ZoneInfo zoneInfo;
+ uassertStatusOK(zoneInfo.addRangeToZone({BSON("x" << 10), BSON("x" << MAXKEY), "zone1"}));
+
+ auto routingInfo = makeRoutingInfo(KeyPattern(BSON("x" << 1)), chunks);
+ auto imbalanceCount = getMaxChunkImbalanceCount(routingInfo, shards, zoneInfo);
+
+ // Chunk counts:
+ // Default Zone: a: 1, b: 0
+ // zone1: a: 1, b: 3
+
+ ASSERT_EQ(2, imbalanceCount);
+}
+
+TEST_F(BalanceStatsTest, BalancedChunkInZonesButNotShards) {
+ std::vector<ShardType> shards;
+ shards.push_back(makeShard("a", {"zone1"}));
+ shards.push_back(makeShard("b", {"zone2"}));
+ shards.push_back(makeShard("c", {"zone2"}));
+
+ std::vector<ChunkType> chunks;
+ chunks.push_back(makeChunk(BSON("x" << MINKEY), BSON("x" << 10), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 10), BSON("x" << 20), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 20), BSON("x" << 30), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 30), BSON("x" << 40), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 40), BSON("x" << 50), ShardId("b")));
+ chunks.push_back(makeChunk(BSON("x" << 50), BSON("x" << MAXKEY), ShardId("c")));
+
+ ZoneInfo zoneInfo;
+ uassertStatusOK(zoneInfo.addRangeToZone({BSON("x" << MINKEY), BSON("x" << 40), "zone1"}));
+ uassertStatusOK(zoneInfo.addRangeToZone({BSON("x" << 40), BSON("x" << MAXKEY), "zone2"}));
+
+ auto routingInfo = makeRoutingInfo(KeyPattern(BSON("x" << 1)), chunks);
+ auto imbalanceCount = getMaxChunkImbalanceCount(routingInfo, shards, zoneInfo);
+
+ // Chunk counts:
+ // zone1: a: 4
+ // zone2: b:1, c: 1
+
+ ASSERT_EQ(0, imbalanceCount);
+}
+
+TEST_F(BalanceStatsTest, NoChunkInAShard) {
+ std::vector<ShardType> shards;
+ shards.push_back(makeShard("a", {"zone1"}));
+ shards.push_back(makeShard("b", {"zone1"}));
+ shards.push_back(makeShard("c"));
+
+ std::vector<ChunkType> chunks;
+ chunks.push_back(makeChunk(BSON("x" << MINKEY), BSON("x" << 10), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 10), BSON("x" << 20), ShardId("b")));
+ chunks.push_back(makeChunk(BSON("x" << 20), BSON("x" << 30), ShardId("a")));
+ chunks.push_back(makeChunk(BSON("x" << 30), BSON("x" << MAXKEY), ShardId("b")));
+
+ ZoneInfo zoneInfo;
+ uassertStatusOK(zoneInfo.addRangeToZone({BSON("x" << MINKEY), BSON("x" << 20), "zone1"}));
+
+ auto routingInfo = makeRoutingInfo(KeyPattern(BSON("x" << 1)), chunks);
+ auto imbalanceCount = getMaxChunkImbalanceCount(routingInfo, shards, zoneInfo);
+
+ // Chunk counts:
+ // default zone: a: 1, b: 1, c: 0
+ // zone1: a: 1, b: 1
+
+ ASSERT_EQ(1, imbalanceCount);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index 93d573c8f75..b8c6bafd65f 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -38,6 +38,8 @@
#include "mongo/db/logical_session_cache.h"
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/repl/primary_only_service.h"
+#include "mongo/db/s/balancer/balance_stats.h"
+#include "mongo/db/s/balancer/balancer_policy.h"
#include "mongo/db/s/config/initial_split_policy.h"
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/resharding/resharding_coordinator_commit_monitor.h"
@@ -1134,6 +1136,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::_persistDecisionAndFinishRe
.then([this, self = shared_from_this(), executor] {
_tellAllParticipantsToRefresh(_coordinatorDoc.getSourceNss(), executor);
})
+ .then([this] { _updateChunkImbalanceMetrics(_coordinatorDoc.getSourceNss()); })
.then([this, self = shared_from_this(), executor] {
// The shared_ptr maintaining the ReshardingCoordinatorService Instance object gets
// deleted from the PrimaryOnlyService's map. Thus, shared_from_this() is necessary to
@@ -1660,4 +1663,44 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_tellAllParticipantsTo
**executor);
}
+void ReshardingCoordinatorService::ReshardingCoordinator::_updateChunkImbalanceMetrics(
+ const NamespaceString& nss) {
+ auto cancellableOpCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
+ auto opCtx = cancellableOpCtx.get();
+
+ try {
+ auto routingInfo = uassertStatusOK(
+ Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
+ nss));
+
+ const auto collectionZones =
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->getTagsForCollection(opCtx, nss));
+
+ const auto& keyPattern = routingInfo.getShardKeyPattern().getKeyPattern();
+
+ ZoneInfo zoneInfo;
+ for (const auto& tag : collectionZones) {
+ uassertStatusOK(zoneInfo.addRangeToZone(
+ ZoneRange(keyPattern.extendRangeBound(tag.getMinKey(), false),
+ keyPattern.extendRangeBound(tag.getMaxKey(), false),
+ tag.getTag())));
+ }
+
+ const auto allShardsWithOpTime =
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->getAllShards(
+ opCtx, repl::ReadConcernLevel::kLocalReadConcern));
+
+ auto imbalanceCount =
+ getMaxChunkImbalanceCount(routingInfo, allShardsWithOpTime.value, zoneInfo);
+
+ ReshardingMetrics::get(opCtx->getServiceContext())
+ ->setLastReshardChunkImbalanceCount(imbalanceCount);
+ } catch (const DBException& ex) {
+ LOGV2_WARNING(5543000,
+ "Encountered error while trying to update resharding chunk imbalance metrics",
+ "namespace"_attr = nss,
+ "error"_attr = redact(ex.toStatus()));
+ }
+}
+
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.h b/src/mongo/db/s/resharding/resharding_coordinator_service.h
index 454e6fd5c49..b0a6a3a40c1 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.h
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.h
@@ -424,6 +424,11 @@ private:
void _tellAllParticipantsToAbort(const std::shared_ptr<executor::ScopedTaskExecutor>& executor,
bool isUserAborted);
+ /**
+ * Best effort attempt to update the chunk imbalance metrics.
+ */
+ void _updateChunkImbalanceMetrics(const NamespaceString& nss);
+
// The unique key for a given resharding operation. InstanceID is an alias for BSONObj. The
// value of this is the UUID that will be used as the collection UUID for the new sharded
// collection. The object looks like: {_id: 'reshardingUUID'}
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
index 785ddd3305d..f4f61a1a413 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
@@ -411,6 +411,12 @@ public:
opCtx->getServiceContext()->getPreciseClockSource()->now());
client.insert(CollectionType::ConfigNS.ns(), originalNssCatalogEntry.toBSON());
+ DatabaseType dbDoc(coordinatorDoc.getSourceNss().db().toString(),
+ coordinatorDoc.getDonorShards().front().getId(),
+ true,
+ DatabaseVersion{UUID::gen()});
+ client.insert(DatabaseType::ConfigNS.ns(), dbDoc.toBSON());
+
return coordinatorDoc;
}
diff --git a/src/mongo/db/s/resharding/resharding_metrics.cpp b/src/mongo/db/s/resharding/resharding_metrics.cpp
index c7cb0747b70..71c8542268d 100644
--- a/src/mongo/db/s/resharding/resharding_metrics.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics.cpp
@@ -61,6 +61,7 @@ constexpr auto kCoordinatorState = "coordinatorState";
constexpr auto kDonorState = "donorState";
constexpr auto kRecipientState = "recipientState";
constexpr auto kOpStatus = "opStatus";
+constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance";
using MetricsPtr = std::unique_ptr<ReshardingMetrics>;
@@ -154,6 +155,8 @@ public:
TimeInterval inCriticalSection;
int64_t writesDuringCriticalSection = 0;
+ int64_t chunkImbalanceCount = 0;
+
boost::optional<DonorStateEnum> donorState;
boost::optional<RecipientStateEnum> recipientState;
boost::optional<CoordinatorStateEnum> coordinatorState;
@@ -402,6 +405,15 @@ void ReshardingMetrics::setDocumentsToCopyForCurrentOp(int64_t documents, int64_
_currentOp->bytesToCopy = bytes;
}
+void ReshardingMetrics::setLastReshardChunkImbalanceCount(int64_t newCount) noexcept {
+ stdx::lock_guard<Latch> lk(_mutex);
+
+ invariant(_currentOp, kNoOperationInProgress);
+ invariant(_currentOp->coordinatorState);
+
+ _cumulativeOp->chunkImbalanceCount = newCount;
+}
+
void ReshardingMetrics::onDocumentsCopied(int64_t documents, int64_t bytes) noexcept {
stdx::lock_guard<Latch> lk(_mutex);
if (!_currentOp)
@@ -577,6 +589,7 @@ void ReshardingMetrics::serializeCumulativeOpMetrics(BSONObjBuilder* bob) const
bob->append(kOplogsApplied, ops.oplogEntriesApplied);
bob->append(kWritesDuringCritialSection, ops.writesDuringCriticalSection);
bob->append(kOplogsFetched, ops.oplogEntriesFetched);
+ bob->append(kLastOpEndingChunkImbalance, ops.chunkImbalanceCount);
}
Date_t ReshardingMetrics::_now() const {
diff --git a/src/mongo/db/s/resharding/resharding_metrics.h b/src/mongo/db/s/resharding/resharding_metrics.h
index 134ccae02c7..3dd6167a679 100644
--- a/src/mongo/db/s/resharding/resharding_metrics.h
+++ b/src/mongo/db/s/resharding/resharding_metrics.h
@@ -117,6 +117,9 @@ public:
ReshardingOperationStatusEnum status,
Date_t runningOperationEndTime) noexcept;
+ // Records the chunk imbalance count for the most recent resharding operation.
+ void setLastReshardChunkImbalanceCount(int64_t newCount) noexcept;
+
struct ReporterOptions {
ReporterOptions(Role role, UUID id, NamespaceString nss, BSONObj shardKey, bool unique)
: role(role),