summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRandolph Tan <randolph@10gen.com>2022-06-23 19:16:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-23 20:10:18 +0000
commit1b1d8538d0dab776ecccadcecc64f6d7eaebb5e7 (patch)
tree444f1a074eca671513dbd005484093db6ea9c337
parent7ad5fbf86418d2a6f17d553b5888198c5f9de05a (diff)
downloadmongo-1b1d8538d0dab776ecccadcecc64f6d7eaebb5e7.tar.gz
SERVER-67370 Destroy instance metrics object so it's lifetime will not be tied to the future callback function object
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp10
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.cpp10
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service.cpp7
3 files changed, 22 insertions, 5 deletions
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index fe15da0a5ca..9aa5ed7c223 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -1399,6 +1399,14 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
.onCompletion([outerStatus](Status) { return outerStatus; });
})
.onCompletion([this, self = shared_from_this()](Status status) {
+ _metrics->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none);
+
+ // Destroy metrics early so it's lifetime will not be tied to the lifetime of this
+ // state machine. This is because we have future callbacks copy shared pointers to this
+ // state machine that causes it to live longer than expected and potentially overlap
+ // with a newer instance when stepping up.
+ _metrics.reset();
+
if (!status.isOK()) {
{
auto lg = stdx::lock_guard(_fulfillmentMutex);
@@ -1412,8 +1420,6 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
}
_reshardingCoordinatorObserver->interrupt(status);
}
-
- _metrics->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none);
})
.semi();
}
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.cpp b/src/mongo/db/s/resharding/resharding_donor_service.cpp
index b25b25185ce..40b1f17f179 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service.cpp
@@ -414,6 +414,14 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup(
Status status, const CancellationToken& stepdownToken) {
+ _metrics->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none);
+
+ // Destroy metrics early so it's lifetime will not be tied to the lifetime of this state
+ // machine. This is because we have future callbacks copy shared pointers to this state machine
+ // that causes it to live longer than expected and potentially overlap with a newer instance
+ // when stepping up.
+ _metrics.reset();
+
if (!status.isOK()) {
// If the stepdownToken was triggered, it takes priority in order to make sure that
// the promise is set with an error that can be retried with. If it ran into an
@@ -431,8 +439,6 @@ Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup(
ensureFulfilledPromise(lk, _completionPromise, statusForPromise);
}
- _metrics->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none);
-
return status;
}
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.cpp b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
index f52e3b8a3a7..5b66d19e8bd 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
@@ -421,6 +421,12 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand
isCanceled = stepdownToken.isCanceled()](Status dataReplicationHaltStatus) {
_metrics->onStateTransition(toMetricsState(_recipientCtx.getState()), boost::none);
+ // Destroy metrics early so it's lifetime will not be tied to the lifetime of this
+ // state machine. This is because we have future callbacks copy shared pointers to this
+ // state machine that causes it to live longer than expected and potentially overlap
+ // with a newer instance when stepping up.
+ _metrics.reset();
+
// If the stepdownToken was triggered, it takes priority in order to make sure that
// the promise is set with an error that the coordinator can retry with. If it ran into
// an unrecoverable error, it would have fasserted earlier.
@@ -434,7 +440,6 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand
// replication errors because resharding is known to have failed already.
stdx::lock_guard<Latch> lk(_mutex);
ensureFulfilledPromise(lk, _completionPromise, outerStatus);
-
return outerStatus;
});
}