diff options
3 files changed, 22 insertions, 5 deletions
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp index fe15da0a5ca..9aa5ed7c223 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp @@ -1399,6 +1399,14 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run( .onCompletion([outerStatus](Status) { return outerStatus; }); }) .onCompletion([this, self = shared_from_this()](Status status) { + _metrics->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none); + + // Destroy metrics early so it's lifetime will not be tied to the lifetime of this + // state machine. This is because we have future callbacks copy shared pointers to this + // state machine that causes it to live longer than expected and potentially overlap + // with a newer instance when stepping up. + _metrics.reset(); + if (!status.isOK()) { { auto lg = stdx::lock_guard(_fulfillmentMutex); @@ -1412,8 +1420,6 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run( } _reshardingCoordinatorObserver->interrupt(status); } - - _metrics->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none); }) .semi(); } diff --git a/src/mongo/db/s/resharding/resharding_donor_service.cpp b/src/mongo/db/s/resharding/resharding_donor_service.cpp index b25b25185ce..40b1f17f179 100644 --- a/src/mongo/db/s/resharding/resharding_donor_service.cpp +++ b/src/mongo/db/s/resharding/resharding_donor_service.cpp @@ -414,6 +414,14 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup( Status status, const CancellationToken& stepdownToken) { + _metrics->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none); + + // Destroy metrics early so it's lifetime will not be tied to the lifetime of this state + // machine. This is because we have future callbacks copy shared pointers to this state machine + // that causes it to live longer than expected and potentially overlap with a newer instance + // when stepping up. + _metrics.reset(); + if (!status.isOK()) { // If the stepdownToken was triggered, it takes priority in order to make sure that // the promise is set with an error that can be retried with. If it ran into an @@ -431,8 +439,6 @@ Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup( ensureFulfilledPromise(lk, _completionPromise, statusForPromise); } - _metrics->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none); - return status; } diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.cpp b/src/mongo/db/s/resharding/resharding_recipient_service.cpp index f52e3b8a3a7..5b66d19e8bd 100644 --- a/src/mongo/db/s/resharding/resharding_recipient_service.cpp +++ b/src/mongo/db/s/resharding/resharding_recipient_service.cpp @@ -421,6 +421,12 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand isCanceled = stepdownToken.isCanceled()](Status dataReplicationHaltStatus) { _metrics->onStateTransition(toMetricsState(_recipientCtx.getState()), boost::none); + // Destroy metrics early so it's lifetime will not be tied to the lifetime of this + // state machine. This is because we have future callbacks copy shared pointers to this + // state machine that causes it to live longer than expected and potentially overlap + // with a newer instance when stepping up. + _metrics.reset(); + // If the stepdownToken was triggered, it takes priority in order to make sure that // the promise is set with an error that the coordinator can retry with. If it ran into // an unrecoverable error, it would have fasserted earlier. @@ -434,7 +440,6 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand // replication errors because resharding is known to have failed already. stdx::lock_guard<Latch> lk(_mutex); ensureFulfilledPromise(lk, _completionPromise, outerStatus); - return outerStatus; }); } |