summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos José Grillo Ramirez <marcos.grillo@mongodb.com>2023-04-17 14:43:54 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-19 16:45:45 +0000
commit90b7607c7a076311704a040fb25f7e5f5079ec15 (patch)
treec23f4987686faa546e6b97ca0a456d11437a38eb
parente41a5e35f434e4c262f55f7da8d22b218d8df777 (diff)
downloadmongo-90b7607c7a076311704a040fb25f7e5f5079ec15.tar.gz
SERVER-72146 Add extra migration statistics including the total bytes cloned before and during catch up, committed and aborted migrations and total time
(cherry picked from commit 9fae6f63e917c33d6b4bee6d57446a3b6111b96a)
-rw-r--r--src/mongo/db/s/migration_batch_inserter.cpp1
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp8
-rw-r--r--src/mongo/db/s/migration_util.cpp2
-rw-r--r--src/mongo/db/s/sharding_statistics.cpp8
-rw-r--r--src/mongo/db/s/sharding_statistics.h22
5 files changed, 39 insertions, 2 deletions
diff --git a/src/mongo/db/s/migration_batch_inserter.cpp b/src/mongo/db/s/migration_batch_inserter.cpp
index a73d2b52eec..37b2c0d07dd 100644
--- a/src/mongo/db/s/migration_batch_inserter.cpp
+++ b/src/mongo/db/s/migration_batch_inserter.cpp
@@ -165,6 +165,7 @@ void MigrationBatchInserter::run(Status status) const try {
repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp());
ShardingStatistics::get(opCtx).countDocsClonedOnRecipient.addAndFetch(batchNumCloned);
+ ShardingStatistics::get(opCtx).countBytesClonedOnRecipient.addAndFetch(batchClonedBytes);
LOGV2(6718408,
"Incrementing numCloned count by {batchNumCloned} and numClonedBytes by "
"{batchClonedBytes}",
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 4c384e7b2a4..c075a88c5da 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -1438,6 +1438,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
if (!_applyMigrateOp(opCtx, nextBatch)) {
return true;
}
+ ShardingStatistics::get(opCtx).countBytesClonedOnCatchUpOnRecipient.addAndFetch(
+ nextBatch["size"].number());
const int maxIterations = 3600 * 50;
@@ -1693,6 +1695,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const BSONObj& xfer) {
bool didAnything = false;
long long changeInOrphans = 0;
+ long long totalDocs = 0;
// Deleted documents
if (xfer["deleted"].isABSONObj()) {
@@ -1703,6 +1706,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
BSONObjIterator i(xfer["deleted"].Obj());
while (i.more()) {
+ totalDocs++;
AutoGetCollection autoColl(opCtx, _nss, MODE_IX);
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Collection " << _nss.ns()
@@ -1745,6 +1749,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
if (xfer["reload"].isABSONObj()) {
BSONObjIterator i(xfer["reload"].Obj());
while (i.more()) {
+ totalDocs++;
AutoGetCollection autoColl(opCtx, _nss, MODE_IX);
uassert(ErrorCodes::ConflictingOperationInProgress,
str::stream() << "Collection " << _nss.ns()
@@ -1792,6 +1797,9 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
if (changeInOrphans != 0) {
persistUpdatedNumOrphans(opCtx, *_collectionUuid, ChunkRange(_min, _max), changeInOrphans);
}
+
+ ShardingStatistics::get(opCtx).countDocsClonedOnCatchUpOnRecipient.addAndFetch(totalDocs);
+
return didAnything;
}
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index 5f6cd2253e6..1d6690fc571 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -797,6 +797,7 @@ void persistCommitDecision(OperationContext* opCtx,
store.update(opCtx,
BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()),
migrationDoc.toBSON());
+ ShardingStatistics::get(opCtx).countDonorMoveChunkCommitted.addAndFetch(1);
} catch (const ExceptionFor<ErrorCodes::NoMatchingDocument>&) {
LOGV2_ERROR(6439800,
"No coordination doc found on disk for migration",
@@ -820,6 +821,7 @@ void persistAbortDecision(OperationContext* opCtx,
store.update(opCtx,
BSON(MigrationCoordinatorDocument::kIdFieldName << migrationDoc.getId()),
migrationDoc.toBSON());
+ ShardingStatistics::get(opCtx).countDonorMoveChunkAborted.addAndFetch(1);
} catch (const ExceptionFor<ErrorCodes::NoMatchingDocument>&) {
LOGV2(6439801,
"No coordination doc found on disk for migration",
diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp
index 5f328c17e4a..f56ea543378 100644
--- a/src/mongo/db/s/sharding_statistics.cpp
+++ b/src/mongo/db/s/sharding_statistics.cpp
@@ -54,6 +54,9 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const {
builder->append("countStaleConfigErrors", countStaleConfigErrors.load());
builder->append("countDonorMoveChunkStarted", countDonorMoveChunkStarted.load());
+ builder->append("countDonorMoveChunkCommitted", countDonorMoveChunkCommitted.load());
+ builder->append("countDonorMoveChunkAborted", countDonorMoveChunkAborted.load());
+ builder->append("totalDonorMoveChunkTimeMillis", totalDonorMoveChunkTimeMillis.load());
builder->append("totalDonorChunkCloneTimeMillis", totalDonorChunkCloneTimeMillis.load());
builder->append("totalCriticalSectionCommitTimeMillis",
totalCriticalSectionCommitTimeMillis.load());
@@ -61,6 +64,11 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const {
builder->append("totalRecipientCriticalSectionTimeMillis",
totalRecipientCriticalSectionTimeMillis.load());
builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load());
+ builder->append("countBytesClonedOnRecipient", countBytesClonedOnRecipient.load());
+ builder->append("countDocsClonedOnCatchUpOnRecipient",
+ countDocsClonedOnCatchUpOnRecipient.load());
+ builder->append("countBytesClonedOnCatchUpOnRecipient",
+ countBytesClonedOnCatchUpOnRecipient.load());
builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load());
builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load());
builder->append("countDocsDeletedOnDonor", countDocsDeletedOnDonor.load());
diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h
index 91a10cc4308..7a7efeb1464 100644
--- a/src/mongo/db/s/sharding_statistics.h
+++ b/src/mongo/db/s/sharding_statistics.h
@@ -49,6 +49,12 @@ struct ShardingStatistics {
// (whether they succeeded or not).
AtomicWord<long long> countDonorMoveChunkStarted{0};
+ // Cumulative, always-increasing counter of how many chunks this node successfully committed.
+ AtomicWord<long long> countDonorMoveChunkCommitted{0};
+
+ // Cumulative, always-increasing counter of how many move chunks this node aborted.
+ AtomicWord<long long> countDonorMoveChunkAborted{0};
+
// Cumulative, always-increasing counter of how much time the entire move chunk operation took
// (excluding range deletion).
AtomicWord<long long> totalDonorMoveChunkTimeMillis{0};
@@ -61,12 +67,24 @@ struct ShardingStatistics {
// recipient node.
AtomicWord<long long> countDocsClonedOnRecipient{0};
+ // Cumulative, always-increasing counter of how many documents have been cloned on the catch up
+ // phase on the recipient node.
+ AtomicWord<long long> countDocsClonedOnCatchUpOnRecipient{0};
+
+ // Cumulative, always-increasing counter of how many bytes have been cloned on the catch up
+ // phase on the recipient node.
+ AtomicWord<long long> countBytesClonedOnCatchUpOnRecipient{0};
+
+ // Cumulative, always-increasing counter of how many bytes have been cloned on the
+ // recipient node.
+ AtomicWord<long long> countBytesClonedOnRecipient{0};
+
// Cumulative, always-increasing counter of how many documents have been cloned on the donor
// node.
AtomicWord<long long> countDocsClonedOnDonor{0};
- // Cumulative, always-increasing counter of how many documents have been deleted on the donor
- // node by the rangeDeleter.
+ // Cumulative, always-increasing counter of how many documents have been deleted by the
+ // rangeDeleter.
AtomicWord<long long> countDocsDeletedOnDonor{0};
// Cumulative, always-increasing counter of how many chunks this node started to receive