1 files changed, 66 insertions, 0 deletions
diff --git a/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js b/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js
new file mode 100644
index 00000000000..c0bd7fbb364
--- /dev/null
+++ b/jstests/sharding/move_primary_donor_cleaned_up_if_coordinator_steps_up_aborted.js
@@ -0,0 +1,66 @@
+/**
+ * Test that movePrimary coordinator recovers and cleans up the donor after a failover when it is
+ * already aborted.
+ *
+ *  @tags: [
+ *    requires_fcv_70,
+ *    featureFlagOnlineMovePrimaryLifecycle
+ * ]
+ */
+(function() {
+'use strict';
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/parallel_shell_helpers.js");
+
+const st = new ShardingTest({mongos: 1, shards: 2, rs: {nodes: 3}});
+
+const mongos = st.s0;
+const shard0 = st.shard0;
+const oldDonorPrimary = st.rs0.getPrimary();
+const shard1 = st.shard1;
+
+const dbName = 'test_db';
+const collName = 'test_coll';
+const collNS = dbName + '.' + collName;
+
+assert.commandWorked(mongos.adminCommand({enableSharding: dbName, primaryShard: shard0.shardName}));
+assert.commandWorked(mongos.getCollection(collNS).insert({value: 1}));
+assert.commandWorked(mongos.getCollection(collNS).insert({value: 2}));
+
+const donorStartedCloningFp = configureFailPoint(oldDonorPrimary,
+                                                 "pauseDuringMovePrimaryDonorStateTransition",
+                                                 {progress: "after", state: "cloning"});
+
+// Run movePrimary and wait for MovePrimaryDonor to start.
+const joinMovePrimary = startParallelShell(
+    funWithArgs(function(dbName, toShard) {
+        assert.commandFailed(db.adminCommand({movePrimary: dbName, to: toShard}));
+    }, dbName, shard1.shardName), mongos.port);
+
+donorStartedCloningFp.wait();
+
+// Trigger a failover. The MovePrimaryCoordinator will abort on step up. Make sure it does not clean
+// up the donor yet.
+const pauseCoordinatorFps = new Map();
+st.rs0.nodes.map(node => pauseCoordinatorFps.put(
+                     node, configureFailPoint(node, "movePrimaryCoordinatorHangBeforeCleaningUp")));
+st.rs0.getPrimary().adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: 1});
+donorStartedCloningFp.off();
+st.rs0.awaitNodesAgreeOnPrimary();
+
+// TODO SERVER-77115: Investigate why test times out if this sleep is removed.
+sleep(5000);
+
+// Trigger another failover when 1. the MovePrimaryCoordinator is already aborted and 2. the
+// MovePrimaryDonor is still alive. This is the case this test is trying to set up.
+pauseCoordinatorFps.get(st.rs0.getPrimary()).wait();
+st.rs0.getPrimary().adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: 1});
+st.rs0.awaitNodesAgreeOnPrimary();
+pauseCoordinatorFps.values().map(fp => fp.off());
+joinMovePrimary();
+
+// Verify that the MovePrimaryCoordinator has cleaned up the MovePrimaryDonor.
+assert.eq([], shard0.getDB("config").movePrimaryDonors.find({}).toArray());
+
+st.stop();
+})();