diff options
author | Marcos José Grillo Ramirez <marcos.grillo@mongodb.com> | 2023-04-17 14:43:54 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-17 15:23:42 +0000 |
commit | 9fae6f63e917c33d6b4bee6d57446a3b6111b96a (patch) | |
tree | 8792faa75fd202cae18c740c25ce03e89a3e3159 /src/mongo/db | |
parent | 01beeb4c2895153914158b1b5691c8c8cd60356a (diff) | |
download | mongo-9fae6f63e917c33d6b4bee6d57446a3b6111b96a.tar.gz |
SERVER-72146 Add extra migration statistics including the total bytes cloned before and during catch up, committed and aborted migrations and total time
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/s/migration_batch_inserter.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.h | 22 |
5 files changed, 39 insertions, 2 deletions
diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp index a73d2b52eec..37b2c0d07dd 100644 --- a/src/mongo/db/s/migration_batch_inserter.cpp +++ b/src/mongo/db/s/migration_batch_inserter.cpp @@ -165,6 +165,7 @@ void MigrationBatchInserter::run(Status status) const try { repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp()); ShardingStatistics::get(opCtx).countDocsClonedOnRecipient.addAndFetch(batchNumCloned); + ShardingStatistics::get(opCtx).countBytesClonedOnRecipient.addAndFetch(batchClonedBytes); LOGV2(6718408, "Incrementing numCloned count by {batchNumCloned} and numClonedBytes by " "{batchClonedBytes}", diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 34467541e38..56a504eafed 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1440,6 +1440,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, if (!_applyMigrateOp(opCtx, nextBatch)) { return true; } + ShardingStatistics::get(opCtx).countBytesClonedOnCatchUpOnRecipient.addAndFetch( + nextBatch["size"].number()); const int maxIterations = 3600 * 50; @@ -1695,6 +1697,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const BSONObj& xfer) { bool didAnything = false; long long changeInOrphans = 0; + long long totalDocs = 0; // Deleted documents if (xfer["deleted"].isABSONObj()) { @@ -1705,6 +1708,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const BSONObjIterator i(xfer["deleted"].Obj()); while (i.more()) { + totalDocs++; AutoGetCollection autoColl(opCtx, _nss, MODE_IX); uassert(ErrorCodes::ConflictingOperationInProgress, str::stream() << "Collection " << _nss.ns() @@ -1747,6 +1751,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const if (xfer["reload"].isABSONObj()) { BSONObjIterator i(xfer["reload"].Obj()); while (i.more()) { + totalDocs++; AutoGetCollection autoColl(opCtx, _nss, MODE_IX); uassert(ErrorCodes::ConflictingOperationInProgress, str::stream() << "Collection " << _nss.ns() @@ -1794,6 +1799,9 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const if (changeInOrphans != 0) { persistUpdatedNumOrphans(opCtx, *_collectionUuid, ChunkRange(_min, _max), changeInOrphans); } + + ShardingStatistics::get(opCtx).countDocsClonedOnCatchUpOnRecipient.addAndFetch(totalDocs); + return didAnything; } diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 6106752b5c5..2981c513ed6 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -814,6 +814,7 @@ void persistCommitDecision(OperationContext* opCtx, store.update(opCtx, BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()), migrationDoc.toBSON()); + ShardingStatistics::get(opCtx).countDonorMoveChunkCommitted.addAndFetch(1); } catch (const ExceptionFor<ErrorCodes::NoMatchingDocument>&) { LOGV2_ERROR(6439800, "No coordination doc found on disk for migration", @@ -837,6 +838,7 @@ void persistAbortDecision(OperationContext* opCtx, store.update(opCtx, BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()), migrationDoc.toBSON()); + ShardingStatistics::get(opCtx).countDonorMoveChunkAborted.addAndFetch(1); } catch (const ExceptionFor<ErrorCodes::NoMatchingDocument>&) { LOGV2(6439801, "No coordination doc found on disk for migration", diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp index 4c3d54963a2..879d32ad991 100644 --- a/src/mongo/db/s/sharding_statistics.cpp +++ b/src/mongo/db/s/sharding_statistics.cpp @@ -54,6 +54,9 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const { builder->append("countStaleConfigErrors", countStaleConfigErrors.load()); builder->append("countDonorMoveChunkStarted", countDonorMoveChunkStarted.load()); + builder->append("countDonorMoveChunkCommitted", countDonorMoveChunkCommitted.load()); + builder->append("countDonorMoveChunkAborted", countDonorMoveChunkAborted.load()); + builder->append("totalDonorMoveChunkTimeMillis", totalDonorMoveChunkTimeMillis.load()); builder->append("totalDonorChunkCloneTimeMillis", totalDonorChunkCloneTimeMillis.load()); builder->append("totalCriticalSectionCommitTimeMillis", totalCriticalSectionCommitTimeMillis.load()); @@ -61,6 +64,11 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const { builder->append("totalRecipientCriticalSectionTimeMillis", totalRecipientCriticalSectionTimeMillis.load()); builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load()); + builder->append("countBytesClonedOnRecipient", countBytesClonedOnRecipient.load()); + builder->append("countDocsClonedOnCatchUpOnRecipient", + countDocsClonedOnCatchUpOnRecipient.load()); + builder->append("countBytesClonedOnCatchUpOnRecipient", + countBytesClonedOnCatchUpOnRecipient.load()); builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load()); builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load()); builder->append("countDocsDeletedOnDonor", countDocsDeletedOnDonor.load()); diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h index 91a10cc4308..7a7efeb1464 100644 --- a/src/mongo/db/s/sharding_statistics.h +++ b/src/mongo/db/s/sharding_statistics.h @@ -49,6 +49,12 @@ struct ShardingStatistics { // (whether they succeeded or not). AtomicWord<long long> countDonorMoveChunkStarted{0}; + // Cumulative, always-increasing counter of how many chunks this node successfully committed. + AtomicWord<long long> countDonorMoveChunkCommitted{0}; + + // Cumulative, always-increasing counter of how many move chunks this node aborted. + AtomicWord<long long> countDonorMoveChunkAborted{0}; + // Cumulative, always-increasing counter of how much time the entire move chunk operation took // (excluding range deletion). AtomicWord<long long> totalDonorMoveChunkTimeMillis{0}; @@ -61,12 +67,24 @@ struct ShardingStatistics { // recipient node. AtomicWord<long long> countDocsClonedOnRecipient{0}; + // Cumulative, always-increasing counter of how many documents have been cloned on the catch up + // phase on the recipient node. + AtomicWord<long long> countDocsClonedOnCatchUpOnRecipient{0}; + + // Cumulative, always-increasing counter of how many bytes have been cloned on the catch up + // phase on the recipient node. + AtomicWord<long long> countBytesClonedOnCatchUpOnRecipient{0}; + + // Cumulative, always-increasing counter of how many bytes have been cloned on the + // recipient node. + AtomicWord<long long> countBytesClonedOnRecipient{0}; + // Cumulative, always-increasing counter of how many documents have been cloned on the donor // node. AtomicWord<long long> countDocsClonedOnDonor{0}; - // Cumulative, always-increasing counter of how many documents have been deleted on the donor - // node by the rangeDeleter. + // Cumulative, always-increasing counter of how many documents have been deleted by the + // rangeDeleter. AtomicWord<long long> countDocsDeletedOnDonor{0}; // Cumulative, always-increasing counter of how many chunks this node started to receive |