summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Nawrocki <brett.nawrocki@mongodb.com>2023-05-05 18:50:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-13 21:56:54 +0000
commit7ab0d365afc71fbd43b3583fae5424cfb8f7e1a8 (patch)
treea17ba92534ecf1dd60370642651f2fe270a195f0
parent52ecfb60254c6b0627bb0ae4c69b1cb8c46adf59 (diff)
downloadmongo-7ab0d365afc71fbd43b3583fae5424cfb8f7e1a8.tar.gz
SERVER-76872 Prevent donor from outliving MovePrimaryCoordinator
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js66
-rw-r--r--src/mongo/db/s/move_primary/move_primary_donor_service.cpp15
-rw-r--r--src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp4
-rw-r--r--src/mongo/db/s/move_primary_coordinator.cpp20
5 files changed, 100 insertions, 9 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 74d614bdfed..8d8cdad86af 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -383,6 +383,8 @@ last-continuous:
ticket: SERVER-74806
- test_file: jstests/sharding/resharding_with_multi_deletes_reduced_ticket_pool_size.js
ticket: SERVER-77097
+ - test_file: jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js
+ ticket: SERVER-76872
suites: null
last-lts:
all:
@@ -850,4 +852,6 @@ last-lts:
ticket: SERVER-74806
- test_file: jstests/sharding/resharding_with_multi_deletes_reduced_ticket_pool_size.js
ticket: SERVER-77097
+ - test_file: jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js
+ ticket: SERVER-76872
suites: null
diff --git a/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js b/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js
new file mode 100644
index 00000000000..c0bd7fbb364
--- /dev/null
+++ b/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js
@@ -0,0 +1,66 @@
+/**
+ * Test that movePrimary coordinator recovers and cleans up the donor after a failover when it is
+ * already aborted.
+ *
+ * @tags: [
+ * requires_fcv_70,
+ * featureFlagOnlineMovePrimaryLifecycle
+ * ]
+ */
+(function() {
+'use strict';
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallel_shell_helpers.js");
+
+const st = new ShardingTest({mongos: 1, shards: 2, rs: {nodes: 3}});
+
+const mongos = st.s0;
+const shard0 = st.shard0;
+const oldDonorPrimary = st.rs0.getPrimary();
+const shard1 = st.shard1;
+
+const dbName = 'test_db';
+const collName = 'test_coll';
+const collNS = dbName + '.' + collName;
+
+assert.commandWorked(mongos.adminCommand({enableSharding: dbName, primaryShard: shard0.shardName}));
+assert.commandWorked(mongos.getCollection(collNS).insert({value: 1}));
+assert.commandWorked(mongos.getCollection(collNS).insert({value: 2}));
+
+const donorStartedCloningFp = configureFailPoint(oldDonorPrimary,
+ "pauseDuringMovePrimaryDonorStateTransition",
+ {progress: "after", state: "cloning"});
+
+// Run movePrimary and wait for MovePrimaryDonor to start.
+const joinMovePrimary = startParallelShell(
+ funWithArgs(function(dbName, toShard) {
+ assert.commandFailed(db.adminCommand({movePrimary: dbName, to: toShard}));
+ }, dbName, shard1.shardName), mongos.port);
+
+donorStartedCloningFp.wait();
+
+// Trigger a failover. The MovePrimaryCoordinator will abort on step up. Make sure it does not clean
+// up the donor yet.
+const pauseCoordinatorFps = new Map();
+st.rs0.nodes.map(node => pauseCoordinatorFps.put(
+ node, configureFailPoint(node, "movePrimaryCoordinatorHangBeforeCleaningUp")));
+st.rs0.getPrimary().adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: 1});
+donorStartedCloningFp.off();
+st.rs0.awaitNodesAgreeOnPrimary();
+
+// TODO SERVER-77115: Investigate why test times out if this sleep is removed.
+sleep(5000);
+
+// Trigger another failover when 1. the MovePrimaryCoordinator is already aborted and 2. the
+// MovePrimaryDonor is still alive. This is the case this test is trying to set up.
+pauseCoordinatorFps.get(st.rs0.getPrimary()).wait();
+st.rs0.getPrimary().adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: 1});
+st.rs0.awaitNodesAgreeOnPrimary();
+pauseCoordinatorFps.values().map(fp => fp.off());
+joinMovePrimary();
+
+// Verify that the MovePrimaryCoordinator has cleaned up the MovePrimaryDonor.
+assert.eq([], shard0.getDB("config").movePrimaryDonors.find({}).toArray());
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/move_primary/move_primary_donor_service.cpp b/src/mongo/db/s/move_primary/move_primary_donor_service.cpp
index 8bf7ec7c41b..104fb451593 100644
--- a/src/mongo/db/s/move_primary/move_primary_donor_service.cpp
+++ b/src/mongo/db/s/move_primary/move_primary_donor_service.cpp
@@ -42,8 +42,8 @@ namespace {
// Both of these failpoints have the same implementation. A single failpoint can't be active
// multiple times with different arguments, but setting up more complex scenarios sometimes requires
// multiple failpoints.
-MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateEnumTransition);
-MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateEnumTransitionAlternate);
+MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateTransition);
+MONGO_FAIL_POINT_DEFINE(pauseDuringMovePrimaryDonorStateTransitionAlternate);
MONGO_FAIL_POINT_DEFINE(pauseBeforeBeginningMovePrimaryDonorWorkflow);
MONGO_FAIL_POINT_DEFINE(pauseBeforeMovePrimaryDonorPersistsBlockTimestamp);
@@ -74,7 +74,7 @@ boost::optional<StateTransitionProgress> readProgressArgument(const BSONObj& dat
boost::optional<MovePrimaryDonorStateEnum> readStateArgument(const BSONObj& data) {
try {
auto arg = data.getStringField("state");
- IDLParserContext ectx("pauseDuringMovePrimaryDonorStateEnumTransition::readStateArgument");
+ IDLParserContext ectx("pauseDuringMovePrimaryDonorStateTransition::readStateArgument");
return MovePrimaryDonorState_parse(ectx, arg);
} catch (...) {
return boost::none;
@@ -91,7 +91,7 @@ void evaluatePauseDuringStateTransitionFailpoint(StateTransitionProgress progres
auto desiredState = readStateArgument(data);
if (!desiredProgress.has_value() || !desiredState.has_value()) {
LOGV2(7306200,
- "pauseDuringMovePrimaryDonorStateEnumTransition failpoint data must contain "
+ "pauseDuringMovePrimaryDonorStateTransition failpoint data must contain "
"progress and state arguments",
"failpoint"_attr = failpoint.getName(),
"data"_attr = data);
@@ -103,8 +103,8 @@ void evaluatePauseDuringStateTransitionFailpoint(StateTransitionProgress progres
void evaluatePauseDuringStateTransitionFailpoints(StateTransitionProgress progress,
MovePrimaryDonorStateEnum newState) {
- const auto fps = {std::ref(pauseDuringMovePrimaryDonorStateEnumTransition),
- std::ref(pauseDuringMovePrimaryDonorStateEnumTransitionAlternate)};
+ const auto fps = {std::ref(pauseDuringMovePrimaryDonorStateTransition),
+ std::ref(pauseDuringMovePrimaryDonorStateTransitionAlternate)};
for (auto& fp : fps) {
evaluatePauseDuringStateTransitionFailpoint(progress, newState, fp);
}
@@ -307,7 +307,8 @@ void MovePrimaryDonorExternalState::_runCommandOnRecipient(OperationContext* opC
DatabaseName::kAdmin.toString(),
command,
Shard::RetryPolicy::kNoRetry);
- uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(response));
+ uassertStatusOKWithContext(Shard::CommandResponse::getEffectiveStatus(response),
+ "Received error from remote MovePrimaryRecipient");
}
MovePrimaryDonorExternalStateImpl::MovePrimaryDonorExternalStateImpl(
diff --git a/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp b/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp
index 56442402256..fe7ea75b7a2 100644
--- a/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp
+++ b/src/mongo/db/s/move_primary/move_primary_donor_service_test.cpp
@@ -216,13 +216,13 @@ protected:
auto pauseStateTransition(const std::string& progress, MovePrimaryDonorStateEnum state) {
return pauseStateTransitionImpl(
- progress, state, "pauseDuringMovePrimaryDonorStateEnumTransition");
+ progress, state, "pauseDuringMovePrimaryDonorStateTransition");
}
auto pauseStateTransitionAlternate(const std::string& progress,
MovePrimaryDonorStateEnum state) {
return pauseStateTransitionImpl(
- progress, state, "pauseDuringMovePrimaryDonorStateEnumTransitionAlternate");
+ progress, state, "pauseDuringMovePrimaryDonorStateTransitionAlternate");
}
auto failCrudOpsOn(NamespaceString nss, ErrorCodes::Error code) {
diff --git a/src/mongo/db/s/move_primary_coordinator.cpp b/src/mongo/db/s/move_primary_coordinator.cpp
index fbf4cbf3eb9..354e72e52aa 100644
--- a/src/mongo/db/s/move_primary_coordinator.cpp
+++ b/src/mongo/db/s/move_primary_coordinator.cpp
@@ -61,6 +61,7 @@ bool useOnlineCloner() {
} // namespace
MONGO_FAIL_POINT_DEFINE(hangBeforeCloningData);
+MONGO_FAIL_POINT_DEFINE(movePrimaryCoordinatorHangBeforeCleaningUp);
MovePrimaryCoordinator::MovePrimaryCoordinator(ShardingDDLCoordinatorService* service,
const BSONObj& initialState)
@@ -348,8 +349,16 @@ bool MovePrimaryCoordinator::onlineClonerAllowedToBeMissing() const {
}
void MovePrimaryCoordinator::recoverOnlineCloner(OperationContext* opCtx) {
+ if (_onlineCloner) {
+ return;
+ }
_onlineCloner = MovePrimaryDonor::get(opCtx, _dbName, _doc.getToShardId());
if (_onlineCloner) {
+ LOGV2(7687200,
+ "MovePrimaryCoordinator found existing online cloner",
+ "migrationId"_attr = _onlineCloner->getMetadata().getMigrationId(),
+ logAttrs(_dbName),
+ "to"_attr = _doc.getToShardId());
return;
}
invariant(onlineClonerAllowedToBeMissing());
@@ -358,6 +367,11 @@ void MovePrimaryCoordinator::recoverOnlineCloner(OperationContext* opCtx) {
void MovePrimaryCoordinator::createOnlineCloner(OperationContext* opCtx) {
invariant(onlineClonerPossiblyNeverCreated());
_onlineCloner = MovePrimaryDonor::create(opCtx, _dbName, _doc.getToShardId());
+ LOGV2(7687201,
+ "MovePrimaryCoordinator created new online cloner",
+ "migrationId"_attr = _onlineCloner->getMetadata().getMigrationId(),
+ logAttrs(_dbName),
+ "to"_attr = _doc.getToShardId());
}
void MovePrimaryCoordinator::cloneDataUntilReadyForCatchup(OperationContext* opCtx,
@@ -397,6 +411,11 @@ ExecutorFuture<void> MovePrimaryCoordinator::_cleanupOnAbort(
auto* opCtx = opCtxHolder.get();
getForwardableOpMetadata().setOn(opCtx);
+ if (MONGO_unlikely(movePrimaryCoordinatorHangBeforeCleaningUp.shouldFail())) {
+ LOGV2(7687202, "Hit movePrimaryCoordinatorHangBeforeCleaningUp");
+ movePrimaryCoordinatorHangBeforeCleaningUp.pauseWhileSet(opCtx);
+ }
+
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
opCtx, getNewSession(opCtx), **executor);
@@ -464,6 +483,7 @@ void MovePrimaryCoordinator::cleanupOnAbortWithOnlineCloner(OperationContext* op
const CancellationToken& token,
const Status& status) {
unblockReadsAndWrites(opCtx);
+ recoverOnlineCloner(opCtx);
if (!_onlineCloner) {
return;
}