summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos José Grillo Ramirez <marcos.grillo@mongodb.com>2023-04-17 14:43:54 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-19 20:00:07 +0000
commit98fc8a3bb804407cf61b4ee82b0c3bd55a16b950 (patch)
treeae4523153c98197fa7a46cc374a402c1e39aa0cc
parent300be016afdf9be6637cb00cdffb104aa3330f68 (diff)
downloadmongo-98fc8a3bb804407cf61b4ee82b0c3bd55a16b950.tar.gz
SERVER-72146 Add extra migration statistics including the total bytes cloned before and during catch up, committed and aborted migrations and total time
(cherry picked from commit 9fae6f63e917c33d6b4bee6d57446a3b6111b96a) (cherry picked from commit c7b8de3848fa6703599085541f9c7af6cf959288)
-rw-r--r--src/mongo/db/s/migration_batch_inserter.cpp1
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp7
-rw-r--r--src/mongo/db/s/migration_util.cpp2
-rw-r--r--src/mongo/db/s/sharding_statistics.cpp8
-rw-r--r--src/mongo/db/s/sharding_statistics.h22
5 files changed, 38 insertions, 2 deletions
diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp
index d1df6c1a1b8..7197538d9c2 100644
--- a/src/mongo/db/s/migration_batch_inserter.cpp
+++ b/src/mongo/db/s/migration_batch_inserter.cpp
@@ -160,6 +160,7 @@ void MigrationBatchInserter::run(Status status) const try {
repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp());
ShardingStatistics::get(opCtx).countDocsClonedOnRecipient.addAndFetch(batchNumCloned);
+ ShardingStatistics::get(opCtx).countBytesClonedOnRecipient.addAndFetch(batchClonedBytes);
LOGV2(6718408,
"Incrementing numCloned count by {batchNumCloned} and numClonedBytes by "
"{batchClonedBytes}",
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 266f707dd55..8558a33c3fd 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -1172,6 +1172,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
if (!_applyMigrateOp(opCtx, mods, &lastOpApplied)) {
continue;
}
+ ShardingStatistics::get(opCtx).countBytesClonedOnCatchUpOnRecipient.addAndFetch(
+ mods["size"].number());
const int maxIterations = 3600 * 50;
@@ -1326,6 +1328,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx,
invariant(lastOpApplied);
bool didAnything = false;
+ long long totalDocs = 0;
DisableDocumentValidation documentValidationDisabler(
opCtx,
@@ -1341,6 +1344,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx,
BSONObjIterator i(xfer["deleted"].Obj());
while (i.more()) {
+ totalDocs++;
AutoGetCollection autoColl(opCtx, _nss, MODE_IX);
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Collection " << _nss.ns()
@@ -1383,6 +1387,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx,
if (xfer["reload"].isABSONObj()) {
BSONObjIterator i(xfer["reload"].Obj());
while (i.more()) {
+ totalDocs++;
AutoGetCollection autoColl(opCtx, _nss, MODE_IX);
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Collection " << _nss.ns()
@@ -1431,6 +1436,8 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx,
}
}
+ ShardingStatistics::get(opCtx).countDocsClonedOnCatchUpOnRecipient.addAndFetch(totalDocs);
+
return didAnything;
}
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index 531688d5a6b..40e721c42a6 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -674,6 +674,7 @@ void persistCommitDecision(OperationContext* opCtx,
store.upsert(opCtx,
QUERY(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()),
migrationDoc.toBSON());
+ ShardingStatistics::get(opCtx).countDonorMoveChunkCommitted.addAndFetch(1);
if (hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.shouldFail()) {
hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx);
@@ -693,6 +694,7 @@ void persistAbortDecision(OperationContext* opCtx,
store.upsert(opCtx,
QUERY(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()),
migrationDoc.toBSON());
+ ShardingStatistics::get(opCtx).countDonorMoveChunkAborted.addAndFetch(1);
if (hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.shouldFail()) {
hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx);
diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp
index fd201333860..4c88d479e77 100644
--- a/src/mongo/db/s/sharding_statistics.cpp
+++ b/src/mongo/db/s/sharding_statistics.cpp
@@ -55,11 +55,19 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const {
builder->append("countStaleConfigErrors", countStaleConfigErrors.load());
builder->append("countDonorMoveChunkStarted", countDonorMoveChunkStarted.load());
+ builder->append("countDonorMoveChunkCommitted", countDonorMoveChunkCommitted.load());
+ builder->append("countDonorMoveChunkAborted", countDonorMoveChunkAborted.load());
+ builder->append("totalDonorMoveChunkTimeMillis", totalDonorMoveChunkTimeMillis.load());
builder->append("totalDonorChunkCloneTimeMillis", totalDonorChunkCloneTimeMillis.load());
builder->append("totalCriticalSectionCommitTimeMillis",
totalCriticalSectionCommitTimeMillis.load());
builder->append("totalCriticalSectionTimeMillis", totalCriticalSectionTimeMillis.load());
builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load());
+ builder->append("countBytesClonedOnRecipient", countBytesClonedOnRecipient.load());
+ builder->append("countDocsClonedOnCatchUpOnRecipient",
+ countDocsClonedOnCatchUpOnRecipient.load());
+ builder->append("countBytesClonedOnCatchUpOnRecipient",
+ countBytesClonedOnCatchUpOnRecipient.load());
builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load());
builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load());
builder->append("countDocsDeletedOnDonor", countDocsDeletedOnDonor.load());
diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h
index eb33aa8fd12..65b78877749 100644
--- a/src/mongo/db/s/sharding_statistics.h
+++ b/src/mongo/db/s/sharding_statistics.h
@@ -49,6 +49,12 @@ struct ShardingStatistics {
// (whether they succeeded or not).
AtomicWord<long long> countDonorMoveChunkStarted{0};
+ // Cumulative, always-increasing counter of how many chunks this node successfully committed.
+ AtomicWord<long long> countDonorMoveChunkCommitted{0};
+
+ // Cumulative, always-increasing counter of how many move chunks this node aborted.
+ AtomicWord<long long> countDonorMoveChunkAborted{0};
+
// Cumulative, always-increasing counter of how much time the entire move chunk operation took
// (excluding range deletion).
AtomicWord<long long> totalDonorMoveChunkTimeMillis{0};
@@ -61,12 +67,24 @@ struct ShardingStatistics {
// recipient node.
AtomicWord<long long> countDocsClonedOnRecipient{0};
+ // Cumulative, always-increasing counter of how many documents have been cloned on the catch up
+ // phase on the recipient node.
+ AtomicWord<long long> countDocsClonedOnCatchUpOnRecipient{0};
+
+ // Cumulative, always-increasing counter of how many bytes have been cloned on the catch up
+ // phase on the recipient node.
+ AtomicWord<long long> countBytesClonedOnCatchUpOnRecipient{0};
+
+ // Cumulative, always-increasing counter of how many bytes have been cloned on the
+ // recipient node.
+ AtomicWord<long long> countBytesClonedOnRecipient{0};
+
// Cumulative, always-increasing counter of how many documents have been cloned on the donor
// node.
AtomicWord<long long> countDocsClonedOnDonor{0};
- // Cumulative, always-increasing counter of how many documents have been deleted on the donor
- // node by the rangeDeleter.
+ // Cumulative, always-increasing counter of how many documents have been deleted by the
+ // rangeDeleter.
AtomicWord<long long> countDocsDeletedOnDonor{0};
// Cumulative, always-increasing counter of how many chunks this node started to receive