diff options
author | Marcos José Grillo Ramirez <marcos.grillo@mongodb.com> | 2023-04-17 14:43:54 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-19 20:00:07 +0000 |
commit | 98fc8a3bb804407cf61b4ee82b0c3bd55a16b950 (patch) | |
tree | ae4523153c98197fa7a46cc374a402c1e39aa0cc | |
parent | 300be016afdf9be6637cb00cdffb104aa3330f68 (diff) | |
download | mongo-98fc8a3bb804407cf61b4ee82b0c3bd55a16b950.tar.gz |
SERVER-72146 Add extra migration statistics including the total bytes cloned before and during catch up, committed and aborted migrations and total time
(cherry picked from commit 9fae6f63e917c33d6b4bee6d57446a3b6111b96a)
(cherry picked from commit c7b8de3848fa6703599085541f9c7af6cf959288)
-rw-r--r-- | src/mongo/db/s/migration_batch_inserter.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.h | 22 |
5 files changed, 38 insertions, 2 deletions
diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp index d1df6c1a1b8..7197538d9c2 100644 --- a/src/mongo/db/s/migration_batch_inserter.cpp +++ b/src/mongo/db/s/migration_batch_inserter.cpp @@ -160,6 +160,7 @@ void MigrationBatchInserter::run(Status status) const try { repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp()); ShardingStatistics::get(opCtx).countDocsClonedOnRecipient.addAndFetch(batchNumCloned); + ShardingStatistics::get(opCtx).countBytesClonedOnRecipient.addAndFetch(batchClonedBytes); LOGV2(6718408, "Incrementing numCloned count by {batchNumCloned} and numClonedBytes by " "{batchClonedBytes}", diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 266f707dd55..8558a33c3fd 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1172,6 +1172,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { if (!_applyMigrateOp(opCtx, mods, &lastOpApplied)) { continue; } + ShardingStatistics::get(opCtx).countBytesClonedOnCatchUpOnRecipient.addAndFetch( + mods["size"].number()); const int maxIterations = 3600 * 50; @@ -1326,6 +1328,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, invariant(lastOpApplied); bool didAnything = false; + long long totalDocs = 0; DisableDocumentValidation documentValidationDisabler( opCtx, @@ -1341,6 +1344,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, BSONObjIterator i(xfer["deleted"].Obj()); while (i.more()) { + totalDocs++; AutoGetCollection autoColl(opCtx, _nss, MODE_IX); uassert(ErrorCodes::ConflictingOperationInProgress, str::stream() << "Collection " << _nss.ns() @@ -1383,6 +1387,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, if (xfer["reload"].isABSONObj()) { BSONObjIterator i(xfer["reload"].Obj()); while (i.more()) { + totalDocs++; AutoGetCollection autoColl(opCtx, _nss, MODE_IX); uassert(ErrorCodes::ConflictingOperationInProgress, str::stream() << "Collection " << _nss.ns() @@ -1431,6 +1436,8 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, } } + ShardingStatistics::get(opCtx).countDocsClonedOnCatchUpOnRecipient.addAndFetch(totalDocs); + return didAnything; } diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 531688d5a6b..40e721c42a6 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -674,6 +674,7 @@ void persistCommitDecision(OperationContext* opCtx, store.upsert(opCtx, QUERY(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()), migrationDoc.toBSON()); + ShardingStatistics::get(opCtx).countDonorMoveChunkCommitted.addAndFetch(1); if (hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.shouldFail()) { hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx); @@ -693,6 +694,7 @@ void persistAbortDecision(OperationContext* opCtx, store.upsert(opCtx, QUERY(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()), migrationDoc.toBSON()); + ShardingStatistics::get(opCtx).countDonorMoveChunkAborted.addAndFetch(1); if (hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.shouldFail()) { hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx); diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp index fd201333860..4c88d479e77 100644 --- a/src/mongo/db/s/sharding_statistics.cpp +++ b/src/mongo/db/s/sharding_statistics.cpp @@ -55,11 +55,19 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const { builder->append("countStaleConfigErrors", countStaleConfigErrors.load()); builder->append("countDonorMoveChunkStarted", countDonorMoveChunkStarted.load()); + builder->append("countDonorMoveChunkCommitted", countDonorMoveChunkCommitted.load()); + builder->append("countDonorMoveChunkAborted", countDonorMoveChunkAborted.load()); + builder->append("totalDonorMoveChunkTimeMillis", totalDonorMoveChunkTimeMillis.load()); builder->append("totalDonorChunkCloneTimeMillis", totalDonorChunkCloneTimeMillis.load()); builder->append("totalCriticalSectionCommitTimeMillis", totalCriticalSectionCommitTimeMillis.load()); builder->append("totalCriticalSectionTimeMillis", totalCriticalSectionTimeMillis.load()); builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load()); + builder->append("countBytesClonedOnRecipient", countBytesClonedOnRecipient.load()); + builder->append("countDocsClonedOnCatchUpOnRecipient", + countDocsClonedOnCatchUpOnRecipient.load()); + builder->append("countBytesClonedOnCatchUpOnRecipient", + countBytesClonedOnCatchUpOnRecipient.load()); builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load()); builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load()); builder->append("countDocsDeletedOnDonor", countDocsDeletedOnDonor.load()); diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h index eb33aa8fd12..65b78877749 100644 --- a/src/mongo/db/s/sharding_statistics.h +++ b/src/mongo/db/s/sharding_statistics.h @@ -49,6 +49,12 @@ struct ShardingStatistics { // (whether they succeeded or not). AtomicWord<long long> countDonorMoveChunkStarted{0}; + // Cumulative, always-increasing counter of how many chunks this node successfully committed. + AtomicWord<long long> countDonorMoveChunkCommitted{0}; + + // Cumulative, always-increasing counter of how many move chunks this node aborted. + AtomicWord<long long> countDonorMoveChunkAborted{0}; + // Cumulative, always-increasing counter of how much time the entire move chunk operation took // (excluding range deletion). AtomicWord<long long> totalDonorMoveChunkTimeMillis{0}; @@ -61,12 +67,24 @@ struct ShardingStatistics { // recipient node. AtomicWord<long long> countDocsClonedOnRecipient{0}; + // Cumulative, always-increasing counter of how many documents have been cloned on the catch up + // phase on the recipient node. + AtomicWord<long long> countDocsClonedOnCatchUpOnRecipient{0}; + + // Cumulative, always-increasing counter of how many bytes have been cloned on the catch up + // phase on the recipient node. + AtomicWord<long long> countBytesClonedOnCatchUpOnRecipient{0}; + + // Cumulative, always-increasing counter of how many bytes have been cloned on the + // recipient node. + AtomicWord<long long> countBytesClonedOnRecipient{0}; + // Cumulative, always-increasing counter of how many documents have been cloned on the donor // node. AtomicWord<long long> countDocsClonedOnDonor{0}; - // Cumulative, always-increasing counter of how many documents have been deleted on the donor - // node by the rangeDeleter. + // Cumulative, always-increasing counter of how many documents have been deleted by the + // rangeDeleter. AtomicWord<long long> countDocsDeletedOnDonor{0}; // Cumulative, always-increasing counter of how many chunks this node started to receive |