diff options
author | Marcos José Grillo Ramirez <marcos.grillo@mongodb.com> | 2023-04-17 14:43:54 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-19 19:12:24 +0000 |
commit | c7b8de3848fa6703599085541f9c7af6cf959288 (patch) | |
tree | dfef61839f993ff7cee9af3b2fbeb4408c76570c | |
parent | 8856ae5c99f5db9acf73291f8d0085a6a3f214dd (diff) | |
download | mongo-c7b8de3848fa6703599085541f9c7af6cf959288.tar.gz |
SERVER-72146 Add extra migration statistics including the total bytes cloned before and during catch up, committed and aborted migrations and total time
(cherry picked from commit 9fae6f63e917c33d6b4bee6d57446a3b6111b96a)
-rw-r--r-- | src/mongo/db/s/migration_batch_inserter.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.h | 22 |
5 files changed, 39 insertions, 2 deletions
diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp index 30abce5c3a9..ac0a59126a0 100644 --- a/src/mongo/db/s/migration_batch_inserter.cpp +++ b/src/mongo/db/s/migration_batch_inserter.cpp @@ -163,6 +163,7 @@ void MigrationBatchInserter::run(Status status) const try { repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp()); ShardingStatistics::get(opCtx).countDocsClonedOnRecipient.addAndFetch(batchNumCloned); + ShardingStatistics::get(opCtx).countBytesClonedOnRecipient.addAndFetch(batchClonedBytes); LOGV2(6718408, "Incrementing numCloned count by {batchNumCloned} and numClonedBytes by " "{batchClonedBytes}", diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 2f5230b1702..422d62346a0 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1407,6 +1407,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, if (!_applyMigrateOp(opCtx, nextBatch)) { return true; } + ShardingStatistics::get(opCtx).countBytesClonedOnCatchUpOnRecipient.addAndFetch( + nextBatch["size"].number()); const int maxIterations = 3600 * 50; @@ -1671,6 +1673,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const BSONObj& xfer) { bool didAnything = false; long long changeInOrphans = 0; + long long totalDocs = 0; // Deleted documents if (xfer["deleted"].isABSONObj()) { @@ -1681,6 +1684,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const BSONObjIterator i(xfer["deleted"].Obj()); while (i.more()) { + totalDocs++; AutoGetCollection autoColl(opCtx, _nss, MODE_IX); uassert(ErrorCodes::ConflictingOperationInProgress, str::stream() << "Collection " << _nss.ns() @@ -1723,6 +1727,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const if (xfer["reload"].isABSONObj()) { BSONObjIterator i(xfer["reload"].Obj()); while (i.more()) { + totalDocs++; AutoGetCollection autoColl(opCtx, _nss, MODE_IX); uassert(ErrorCodes::ConflictingOperationInProgress, str::stream() << "Collection " << _nss.ns() @@ -1777,6 +1782,9 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const migrationutil::persistUpdatedNumOrphans( opCtx, _migrationId.get(), *_collectionUuid, changeInOrphans); } + + ShardingStatistics::get(opCtx).countDocsClonedOnCatchUpOnRecipient.addAndFetch(totalDocs); + return didAnything; } diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 2165c92e778..4484c4b4137 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -844,6 +844,7 @@ void persistCommitDecision(OperationContext* opCtx, store.upsert(opCtx, BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()), migrationDoc.toBSON()); + ShardingStatistics::get(opCtx).countDonorMoveChunkCommitted.addAndFetch(1); if (hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.shouldFail()) { hangInPersistMigrateCommitDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx); @@ -863,6 +864,7 @@ void persistAbortDecision(OperationContext* opCtx, store.upsert(opCtx, BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()), migrationDoc.toBSON()); + ShardingStatistics::get(opCtx).countDonorMoveChunkAborted.addAndFetch(1); if (hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.shouldFail()) { hangInPersistMigrateAbortDecisionThenSimulateErrorUninterruptible.pauseWhileSet(opCtx); diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp index ea17a500202..d103ad001fa 100644 --- a/src/mongo/db/s/sharding_statistics.cpp +++ b/src/mongo/db/s/sharding_statistics.cpp @@ -55,6 +55,9 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const { builder->append("countStaleConfigErrors", countStaleConfigErrors.load()); builder->append("countDonorMoveChunkStarted", countDonorMoveChunkStarted.load()); + builder->append("countDonorMoveChunkCommitted", countDonorMoveChunkCommitted.load()); + builder->append("countDonorMoveChunkAborted", countDonorMoveChunkAborted.load()); + builder->append("totalDonorMoveChunkTimeMillis", totalDonorMoveChunkTimeMillis.load()); builder->append("totalDonorChunkCloneTimeMillis", totalDonorChunkCloneTimeMillis.load()); builder->append("totalCriticalSectionCommitTimeMillis", totalCriticalSectionCommitTimeMillis.load()); @@ -62,6 +65,11 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const { builder->append("totalRecipientCriticalSectionTimeMillis", totalRecipientCriticalSectionTimeMillis.load()); builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load()); + builder->append("countBytesClonedOnRecipient", countBytesClonedOnRecipient.load()); + builder->append("countDocsClonedOnCatchUpOnRecipient", + countDocsClonedOnCatchUpOnRecipient.load()); + builder->append("countBytesClonedOnCatchUpOnRecipient", + countBytesClonedOnCatchUpOnRecipient.load()); builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load()); builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load()); builder->append("countDocsDeletedOnDonor", countDocsDeletedOnDonor.load()); diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h index 91a10cc4308..7a7efeb1464 100644 --- a/src/mongo/db/s/sharding_statistics.h +++ b/src/mongo/db/s/sharding_statistics.h @@ -49,6 +49,12 @@ struct ShardingStatistics { // (whether they succeeded or not). AtomicWord<long long> countDonorMoveChunkStarted{0}; + // Cumulative, always-increasing counter of how many chunks this node successfully committed. + AtomicWord<long long> countDonorMoveChunkCommitted{0}; + + // Cumulative, always-increasing counter of how many move chunks this node aborted. + AtomicWord<long long> countDonorMoveChunkAborted{0}; + // Cumulative, always-increasing counter of how much time the entire move chunk operation took // (excluding range deletion). AtomicWord<long long> totalDonorMoveChunkTimeMillis{0}; @@ -61,12 +67,24 @@ struct ShardingStatistics { // recipient node. AtomicWord<long long> countDocsClonedOnRecipient{0}; + // Cumulative, always-increasing counter of how many documents have been cloned on the catch up + // phase on the recipient node. + AtomicWord<long long> countDocsClonedOnCatchUpOnRecipient{0}; + + // Cumulative, always-increasing counter of how many bytes have been cloned on the catch up + // phase on the recipient node. + AtomicWord<long long> countBytesClonedOnCatchUpOnRecipient{0}; + + // Cumulative, always-increasing counter of how many bytes have been cloned on the + // recipient node. + AtomicWord<long long> countBytesClonedOnRecipient{0}; + // Cumulative, always-increasing counter of how many documents have been cloned on the donor // node. AtomicWord<long long> countDocsClonedOnDonor{0}; - // Cumulative, always-increasing counter of how many documents have been deleted on the donor - // node by the rangeDeleter. + // Cumulative, always-increasing counter of how many documents have been deleted by the + // rangeDeleter. AtomicWord<long long> countDocsDeletedOnDonor{0}; // Cumulative, always-increasing counter of how many chunks this node started to receive |