diff options
author | Brett Nawrocki <brett.nawrocki@mongodb.com> | 2023-05-05 18:50:17 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-05-13 21:56:54 +0000 |
commit | 7ab0d365afc71fbd43b3583fae5424cfb8f7e1a8 (patch) | |
tree | a17ba92534ecf1dd60370642651f2fe270a195f0 | |
parent | 52ecfb60254c6b0627bb0ae4c69b1cb8c46adf59 (diff) | |
download | mongo-7ab0d365afc71fbd43b3583fae5424cfb8f7e1a8.tar.gz |
SERVER-76872 Prevent donor from outliving MovePrimaryCoordinator
5 files changed, 100 insertions, 9 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 74d614bdfed..8d8cdad86af 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -383,6 +383,8 @@ last-continuous: ticket: SERVER-74806 - test_file: jstests/sharding/resharding_with_multi_deletes_reduced_ticket_pool_size.js ticket: SERVER-77097 + - test_file: jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js + ticket: SERVER-76872 suites: null last-lts: all: @@ -850,4 +852,6 @@ last-lts: ticket: SERVER-74806 - test_file: jstests/sharding/resharding_with_multi_deletes_reduced_ticket_pool_size.js ticket: SERVER-77097 + - test_file: jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js + ticket: SERVER-76872 suites: null diff --git a/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js b/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js new file mode 100644 index 00000000000..c0bd7fbb364 --- /dev/null +++ b/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js @@ -0,0 +1,66 @@ +/** + * Test that movePrimary coordinator recovers and cleans up the donor after a failover when it is + * already aborted. + * + * @tags: [ + * requires_fcv_70, + * featureFlagOnlineMovePrimaryLifecycle + * ] + */ +(function() { +'use strict'; +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallel_shell_helpers.js"); + +const st = new ShardingTest({mongos: 1, shards: 2, rs: {nodes: 3}}); + +const mongos = st.s0; +const shard0 = st.shard0; +const oldDonorPrimary = st.rs0.getPrimary(); +const shard1 = st.shard1; + +const dbName = 'test_db'; +const collName = 'test_coll'; +const collNS = dbName + '.' + collName; + +assert.commandWorked(mongos.adminCommand({enableSharding: dbName, primaryShard: shard0.shardName})); +assert.commandWorked(mongos.getCollection(collNS).insert({value: 1})); +assert.commandWorked(mongos.getCollection(collNS).insert({value: 2})); + +const donorStartedCloningFp = configureFailPoint(oldDonorPrimary, + "pauseDuringMovePrimaryDonorStateTransition", + {progress: "after", state: "cloning"}); + +// Run movePrimary and wait for MovePrimaryDonor to start. +const joinMovePrimary = startParallelShell( + funWithArgs(function(dbName, toShard) { + assert.commandFailed(db.adminCommand({movePrimary: dbName, to: toShard})); + }, dbName, shard1.shardName), mongos.port); + +donorStartedCloningFp.wait(); + +// Trigger a failover. The MovePrimaryCoordinator will abort on step up. Make sure it does not clean +// up the donor yet. +const pauseCoordinatorFps = new Map(); +st.rs0.nodes.map(node => pauseCoordinatorFps.put( + node, configureFailPoint(node, "movePrimaryCoordinatorHangBeforeCleaningUp"))); +st.rs0.getPrimary().adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: 1}); +donorStartedCloningFp.off(); +st.rs0.awaitNodesAgreeOnPrimary(); + +// TODO SERVER-77115: Investigate why test times out if this sleep is removed. +sleep(5000); + +// Trigger another failover when 1. the MovePrimaryCoordinator is already aborted and 2. the +// MovePrimaryDonor is still alive. This is the case this test is trying to set up. +pauseCoordinatorFps.get(st.rs0.getPrimary()).wait(); +st.rs0.getPrimary().adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: 1}); +st.rs0.awaitNodesAgreeOnPrimary(); +pauseCoordinatorFps.values().map(fp => fp.off()); +joinMovePrimary(); + +// Verify that the MovePrimaryCoordinator has cleaned up the MovePrimaryDonor. +assert.eq([], shard0.getDB("config").movePrimaryDonors.find({}).toArray()); + +st.stop(); +})(); diff --git a/src/mongo/db/s/move_primary/move_primary_donor_service.cpp b/src/mongo/db/s/move_primary/move_primary_donor_service.cpp index 8bf7ec7c41b..104fb451593 100644 --- a/src/mongo/db/s/move_primary/move_primary_donor_service.cpp +++ b/src/mongo/db/s/move_primary/move_primary_donor_service.cpp @@ -42,8 +42,8 @@ namespace { // Both of these failpoints have the same implementation. A single failpoint can't be active // multiple times with different arguments, but setting up more complex scenarios sometimes requires // multiple failpoints. -MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateEnumTransition); -MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateEnumTransitionAlternate); +MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateTransition); +MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateTransitionAlternate); MONGO_FAIL_POINT_DEFINE(pauseBeforeBeginningMovePrimaryDonorWorkflow); MONGO_FAIL_POINT_DEFINE(pauseBeforeMovePrimaryDonorPersistsBlockTimestamp); @@ -74,7 +74,7 @@ boost::optional<StateTransitionProgress> readProgressArgument(const BSONObj& dat boost::optional<MovePrimaryDonorStateEnum> readStateArgument(const BSONObj& data) { try { auto arg = data.getStringField("state"); - IDLParserContext ectx("pauseDuringMovePrimaryDonorStateEnumTransition::readStateArgument"); + IDLParserContext ectx("pauseDuringMovePrimaryDonorStateTransition::readStateArgument"); return MovePrimaryDonorState_parse(ectx, arg); } catch (...) { return boost::none; @@ -91,7 +91,7 @@ void evaluatePauseDuringStateTransitionFailpoint(StateTransitionProgress progres auto desiredState = readStateArgument(data); if (!desiredProgress.has_value() || !desiredState.has_value()) { LOGV2(7306200, - "pauseDuringMovePrimaryDonorStateEnumTransition failpoint data must contain " + "pauseDuringMovePrimaryDonorStateTransition failpoint data must contain " "progress and state arguments", "failpoint"_attr = failpoint.getName(), "data"_attr = data); @@ -103,8 +103,8 @@ void evaluatePauseDuringStateTransitionFailpoint(StateTransitionProgress progres void evaluatePauseDuringStateTransitionFailpoints(StateTransitionProgress progress, MovePrimaryDonorStateEnum newState) { - const auto fps = {std::ref(pauseDuringMovePrimaryDonorStateEnumTransition), - std::ref(pauseDuringMovePrimaryDonorStateEnumTransitionAlternate)}; + const auto fps = {std::ref(pauseDuringMovePrimaryDonorStateTransition), + std::ref(pauseDuringMovePrimaryDonorStateTransitionAlternate)}; for (auto& fp : fps) { evaluatePauseDuringStateTransitionFailpoint(progress, newState, fp); } @@ -307,7 +307,8 @@ void MovePrimaryDonorExternalState::_runCommandOnRecipient(OperationContext* opC DatabaseName::kAdmin.toString(), command, Shard::RetryPolicy::kNoRetry); - uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(response)); + uassertStatusOKWithContext(Shard::CommandResponse::getEffectiveStatus(response), + "Received error from remote MovePrimaryRecipient"); } MovePrimaryDonorExternalStateImpl::MovePrimaryDonorExternalStateImpl( diff --git a/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp b/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp index 56442402256..fe7ea75b7a2 100644 --- a/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp +++ b/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp @@ -216,13 +216,13 @@ protected: auto pauseStateTransition(const std::string& progress, MovePrimaryDonorStateEnum state) { return pauseStateTransitionImpl( - progress, state, "pauseDuringMovePrimaryDonorStateEnumTransition"); + progress, state, "pauseDuringMovePrimaryDonorStateTransition"); } auto pauseStateTransitionAlternate(const std::string& progress, MovePrimaryDonorStateEnum state) { return pauseStateTransitionImpl( - progress, state, "pauseDuringMovePrimaryDonorStateEnumTransitionAlternate"); + progress, state, "pauseDuringMovePrimaryDonorStateTransitionAlternate"); } auto failCrudOpsOn(NamespaceString nss, ErrorCodes::Error code) { diff --git a/src/mongo/db/s/move_primary_coordinator.cpp b/src/mongo/db/s/move_primary_coordinator.cpp index fbf4cbf3eb9..354e72e52aa 100644 --- a/src/mongo/db/s/move_primary_coordinator.cpp +++ b/src/mongo/db/s/move_primary_coordinator.cpp @@ -61,6 +61,7 @@ bool useOnlineCloner() { } // namespace MONGO_FAIL_POINT_DEFINE(hangBeforeCloningData); +MONGO_FAIL_POINT_DEFINE(movePrimaryCoordinatorHangBeforeCleaningUp); MovePrimaryCoordinator::MovePrimaryCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState) @@ -348,8 +349,16 @@ bool MovePrimaryCoordinator::onlineClonerAllowedToBeMissing() const { } void MovePrimaryCoordinator::recoverOnlineCloner(OperationContext* opCtx) { + if (_onlineCloner) { + return; + } _onlineCloner = MovePrimaryDonor::get(opCtx, _dbName, _doc.getToShardId()); if (_onlineCloner) { + LOGV2(7687200, + "MovePrimaryCoordinator found existing online cloner", + "migrationId"_attr = _onlineCloner->getMetadata().getMigrationId(), + logAttrs(_dbName), + "to"_attr = _doc.getToShardId()); return; } invariant(onlineClonerAllowedToBeMissing()); @@ -358,6 +367,11 @@ void MovePrimaryCoordinator::recoverOnlineCloner(OperationContext* opCtx) { void MovePrimaryCoordinator::createOnlineCloner(OperationContext* opCtx) { invariant(onlineClonerPossiblyNeverCreated()); _onlineCloner = MovePrimaryDonor::create(opCtx, _dbName, _doc.getToShardId()); + LOGV2(7687201, + "MovePrimaryCoordinator created new online cloner", + "migrationId"_attr = _onlineCloner->getMetadata().getMigrationId(), + logAttrs(_dbName), + "to"_attr = _doc.getToShardId()); } void MovePrimaryCoordinator::cloneDataUntilReadyForCatchup(OperationContext* opCtx, @@ -397,6 +411,11 @@ ExecutorFuture<void> MovePrimaryCoordinator::_cleanupOnAbort( auto* opCtx = opCtxHolder.get(); getForwardableOpMetadata().setOn(opCtx); + if (MONGO_unlikely(movePrimaryCoordinatorHangBeforeCleaningUp.shouldFail())) { + LOGV2(7687202, "Hit movePrimaryCoordinatorHangBeforeCleaningUp"); + movePrimaryCoordinatorHangBeforeCleaningUp.pauseWhileSet(opCtx); + } + _performNoopRetryableWriteOnAllShardsAndConfigsvr( opCtx, getNewSession(opCtx), **executor); @@ -464,6 +483,7 @@ void MovePrimaryCoordinator::cleanupOnAbortWithOnlineCloner(OperationContext* op const CancellationToken& token, const Status& status) { unblockReadsAndWrites(opCtx); + recoverOnlineCloner(opCtx); if (!_onlineCloner) { return; } |