summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos José Grillo Ramirez <marcos.grillo@mongodb.com>2023-04-17 14:43:54 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-19 19:12:24 +0000
commitc7b8de3848fa6703599085541f9c7af6cf959288 (patch)
treedfef61839f993ff7cee9af3b2fbeb4408c76570c
parent8856ae5c99f5db9acf73291f8d0085a6a3f214dd (diff)
downloadmongo-c7b8de3848fa6703599085541f9c7af6cf959288.tar.gz
SERVER-72146 Add extra migration statistics including the total bytes cloned before and during catch up, committed and aborted migrations and total time
(cherry picked from commit 9fae6f63e917c33d6b4bee6d57446a3b6111b96a)
-rw-r--r--src/mongo/db/s/migration_batch_inserter.cpp1
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp8
-rw-r--r--src/mongo/db/s/migration_util.cpp2
-rw-r--r--src/mongo/db/s/sharding_statistics.cpp8
-rw-r--r--src/mongo/db/s/sharding_statistics.h22
5 files changed, 39 insertions, 2 deletions
diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp
index 30abce5c3a9..ac0a59126a0 100644
--- a/src/mongo/db/s/migration_batch_inserter.cpp
+++ b/src/mongo/db/s/migration_batch_inserter.cpp
@@ -163,6 +163,7 @@ void MigrationBatchInserter::run(Status status) const try {
repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp());
ShardingStatistics::get(opCtx).countDocsClonedOnRecipient.addAndFetch(batchNumCloned);
+ ShardingStatistics::get(opCtx).countBytesClonedOnRecipient.addAndFetch(batchClonedBytes);
LOGV2(6718408,
"Incrementing numCloned count by {batchNumCloned} and numClonedBytes by "
"{batchClonedBytes}",
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 2f5230b1702..422d62346a0 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -1407,6 +1407,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
if (!_applyMigrateOp(opCtx, nextBatch)) {
return true;
}
+ ShardingStatistics::get(opCtx).countBytesClonedOnCatchUpOnRecipient.addAndFetch(
+ nextBatch["size"].number());
const int maxIterations = 3600 * 50;
@@ -1671,6 +1673,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const BSONObj& xfer) {
bool didAnything = false;
long long changeInOrphans = 0;
+ long long totalDocs = 0;
// Deleted documents
if (xfer["deleted"].isABSONObj()) {
@@ -1681,6 +1684,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
BSONObjIterator i(xfer["deleted"].Obj());
while (i.more()) {
+ totalDocs++;
AutoGetCollection autoColl(opCtx, _nss, MODE_IX);
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Collection " << _nss.ns()
@@ -1723,6 +1727,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
if (xfer["reload"].isABSONObj()) {
BSONObjIterator i(xfer["reload"].Obj());
while (i.more()) {
+ totalDocs++;
AutoGetCollection autoColl(opCtx, _nss, MODE_IX);
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Collection " << _nss.ns()
@@ -1777,6 +1782,9 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
migrationutil::persistUpdatedNumOrphans(
opCtx, _migrationId.get(), *_collectionUuid, changeInOrphans);
}
+
+ ShardingStatistics::get(opCtx).countDocsClonedOnCatchUpOnRecipient.addAndFetch(totalDocs);
+
return didAnything;
}
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index 2165c92e778..4484c4b4137 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -844,6 +844,7 @@ void persistCommitDecision(OperationContext* opCtx,
store.upsert(opCtx,
BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()),
migrationDoc.toBSON());
+ ShardingStatistics::get(opCtx).countDonorMoveChunkCommitted.addAndFetch(1);
if (hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.shouldFail()) {
hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx);
@@ -863,6 +864,7 @@ void persistAbortDecision(OperationContext* opCtx,
store.upsert(opCtx,
BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()),
migrationDoc.toBSON());
+ ShardingStatistics::get(opCtx).countDonorMoveChunkAborted.addAndFetch(1);
if (hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.shouldFail()) {
hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx);
diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp
index ea17a500202..d103ad001fa 100644
--- a/src/mongo/db/s/sharding_statistics.cpp
+++ b/src/mongo/db/s/sharding_statistics.cpp
@@ -55,6 +55,9 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const {
builder->append("countStaleConfigErrors", countStaleConfigErrors.load());
builder->append("countDonorMoveChunkStarted", countDonorMoveChunkStarted.load());
+ builder->append("countDonorMoveChunkCommitted", countDonorMoveChunkCommitted.load());
+ builder->append("countDonorMoveChunkAborted", countDonorMoveChunkAborted.load());
+ builder->append("totalDonorMoveChunkTimeMillis", totalDonorMoveChunkTimeMillis.load());
builder->append("totalDonorChunkCloneTimeMillis", totalDonorChunkCloneTimeMillis.load());
builder->append("totalCriticalSectionCommitTimeMillis",
totalCriticalSectionCommitTimeMillis.load());
@@ -62,6 +65,11 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const {
builder->append("totalRecipientCriticalSectionTimeMillis",
totalRecipientCriticalSectionTimeMillis.load());
builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load());
+ builder->append("countBytesClonedOnRecipient", countBytesClonedOnRecipient.load());
+ builder->append("countDocsClonedOnCatchUpOnRecipient",
+ countDocsClonedOnCatchUpOnRecipient.load());
+ builder->append("countBytesClonedOnCatchUpOnRecipient",
+ countBytesClonedOnCatchUpOnRecipient.load());
builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load());
builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load());
builder->append("countDocsDeletedOnDonor", countDocsDeletedOnDonor.load());
diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h
index 91a10cc4308..7a7efeb1464 100644
--- a/src/mongo/db/s/sharding_statistics.h
+++ b/src/mongo/db/s/sharding_statistics.h
@@ -49,6 +49,12 @@ struct ShardingStatistics {
// (whether they succeeded or not).
AtomicWord<long long> countDonorMoveChunkStarted{0};
+ // Cumulative, always-increasing counter of how many chunks this node successfully committed.
+ AtomicWord<long long> countDonorMoveChunkCommitted{0};
+
+ // Cumulative, always-increasing counter of how many move chunks this node aborted.
+ AtomicWord<long long> countDonorMoveChunkAborted{0};
+
// Cumulative, always-increasing counter of how much time the entire move chunk operation took
// (excluding range deletion).
AtomicWord<long long> totalDonorMoveChunkTimeMillis{0};
@@ -61,12 +67,24 @@ struct ShardingStatistics {
// recipient node.
AtomicWord<long long> countDocsClonedOnRecipient{0};
+ // Cumulative, always-increasing counter of how many documents have been cloned on the catch up
+ // phase on the recipient node.
+ AtomicWord<long long> countDocsClonedOnCatchUpOnRecipient{0};
+
+ // Cumulative, always-increasing counter of how many bytes have been cloned on the catch up
+ // phase on the recipient node.
+ AtomicWord<long long> countBytesClonedOnCatchUpOnRecipient{0};
+
+ // Cumulative, always-increasing counter of how many bytes have been cloned on the
+ // recipient node.
+ AtomicWord<long long> countBytesClonedOnRecipient{0};
+
// Cumulative, always-increasing counter of how many documents have been cloned on the donor
// node.
AtomicWord<long long> countDocsClonedOnDonor{0};
- // Cumulative, always-increasing counter of how many documents have been deleted on the donor
- // node by the rangeDeleter.
+ // Cumulative, always-increasing counter of how many documents have been deleted by the
+ // rangeDeleter.
AtomicWord<long long> countDocsDeletedOnDonor{0};
// Cumulative, always-increasing counter of how many chunks this node started to receive