diff options
Diffstat (limited to 'src/mongo/db/s/resharding/resharding_recipient_service_test.cpp')
-rw-r--r-- | src/mongo/db/s/resharding/resharding_recipient_service_test.cpp | 50 |
1 files changed, 49 insertions, 1 deletions
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp index 5d69b790321..b93d43107f7 100644 --- a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp +++ b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp @@ -432,6 +432,49 @@ TEST_F(ReshardingRecipientServiceTest, StepDownStepUpEachTransition) { } } +TEST_F(ReshardingRecipientServiceTest, OpCtxKilledWhileRestoringMetrics) { + for (bool isAlsoDonor : {false, true}) { + LOGV2(5992701, + "Running case", + "test"_attr = _agent.getTestName(), + "isAlsoDonor"_attr = isAlsoDonor); + + // Initialize recipient. + auto doc = makeStateDocument(isAlsoDonor); + auto instanceId = + BSON(ReshardingRecipientDocument::kReshardingUUIDFieldName << doc.getReshardingUUID()); + auto opCtx = makeOperationContext(); + if (isAlsoDonor) { + createSourceCollection(opCtx.get(), doc); + } + RecipientStateMachine::insertStateDocument(opCtx.get(), doc); + auto recipient = RecipientStateMachine::getOrCreate(opCtx.get(), _service, doc.toBSON()); + + // In order to restore metrics, metrics need to exist in the first place, so put the + // recipient in the cloning state, then step down. + PauseDuringStateTransitions stateTransitionsGuard{controller(), + RecipientStateEnum::kCloning}; + notifyToStartCloning(opCtx.get(), *recipient, doc); + stateTransitionsGuard.wait(RecipientStateEnum::kCloning); + stepDown(); + stateTransitionsGuard.unset(RecipientStateEnum::kCloning); + recipient.reset(); + + // Enable failpoint and step up. + auto fp = globalFailPointRegistry().find("reshardingOpCtxKilledWhileRestoringMetrics"); + fp->setMode(FailPoint::nTimes, 1); + stepUp(opCtx.get()); + + // After the failpoint is disabled, the operation should succeed. + auto maybeRecipient = RecipientStateMachine::lookup(opCtx.get(), _service, instanceId); + ASSERT_TRUE(bool(maybeRecipient)); + recipient = *maybeRecipient; + notifyReshardingCommitting(opCtx.get(), *recipient, doc); + ASSERT_OK(recipient->getCompletionFuture().getNoThrow()); + checkStateDocumentRemoved(opCtx.get()); + } +} + DEATH_TEST_REGEX_F(ReshardingRecipientServiceTest, CommitFn, "4457001.*tripwire") { auto doc = makeStateDocument(false /* isAlsoDonor */); auto opCtx = makeOperationContext(); @@ -767,7 +810,6 @@ TEST_F(ReshardingRecipientServiceTest, RestoreMetricsAfterStepUp) { default: break; } - // Step down before the transition to state can complete. stateTransitionsGuard.wait(state); if (state == RecipientStateEnum::kStrictConsistency) { @@ -801,6 +843,12 @@ TEST_F(ReshardingRecipientServiceTest, RestoreMetricsAfterStepUp) { ErrorCodes::InterruptedDueToReplStateChange); prevState = state; + if (state == RecipientStateEnum::kApplying || + state == RecipientStateEnum::kStrictConsistency) { + // If metrics are being verified in the next pass, ensure a retry does not alter values. + auto fp = globalFailPointRegistry().find("reshardingOpCtxKilledWhileRestoringMetrics"); + fp->setMode(FailPoint::nTimes, 1); + } recipient.reset(); if (state != RecipientStateEnum::kDone) |