SERVER-59108 Resolve race with transaction operation not killed after stepdown

(cherry picked from commit 1b31e6ca3d25a35f31f48547aafe0ec33c8c9bfd)
author: Vesselina Ratcheva <vesselina.ratcheva@10gen.com> 2021-10-19 23:49:48 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-10-21 00:33:46 +0000
commit: bba630c895a90a59b78d12105424575b2f847b91 (patch)
tree: 949515fef7b1124d7c244fc2b1f687a2f746b4d8
parent: 73ab1b397b1292dd791775e9f2f73fb0b3b4699b (diff)
download: mongo-bba630c895a90a59b78d12105424575b2f847b91.tar.gz
4 files changed, 100 insertions, 0 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 40d48f9170b..f5e15553e19 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -173,6 +173,8 @@ all:
     test_file: jstests/core/mod_special_values.js
   - ticket: SERVER-59613
     test_file: jstests/aggregation/range.js
+  - ticket: SERVER-59108
+    test_file: jstests/replsets/stepdown_race_with_transaction.js
 
 
 suites:
diff --git a/jstests/replsets/stepdown_race_with_transaction.js b/jstests/replsets/stepdown_race_with_transaction.js
new file mode 100644
index 00000000000..f1674f5ea3a
--- /dev/null
+++ b/jstests/replsets/stepdown_race_with_transaction.js
@@ -0,0 +1,78 @@
+/**
+ * Tests that multi-documment transactions no longer race with stepdown over
+ * "setAlwaysInterruptAtStepDownOrUp".
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+const rst = new ReplSetTest({nodes: 1});
+rst.startSet();
+rst.initiate();
+
+const dbName = "testdb";
+const collName = "testcoll";
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB.getCollection(collName);
+
+// Insert a document that we will later modify in a transaction.
+assert.commandWorked(primaryColl.insert({a: 1}));
+
+// In the first part of the race, we set the 'setAlwaysInterruptAtStepDownOrUp' too late,
+// after stepDown is already done interrupting operations.
+const txnFpBefore = configureFailPoint(primary, "hangBeforeSettingTxnInterruptFlag");
+
+// The second critical part of the race is when the transaction thread has already passed
+// the regular "not primary" checks by the time stepDown has completed and updated
+// writability. (This is the reason we check writability again in the accompanying patch.)
+const txnFpAfter =
+    configureFailPoint(primary, "hangAfterCheckingWritabilityForMultiDocumentTransactions");
+
+jsTestLog("Start the transaction in a parallel shell");
+const txn = startParallelShell(() => {
+    const session = db.getMongo().startSession();
+    const sessionDB = session.getDatabase("testdb");
+    const sessionColl = sessionDB.getCollection("testcoll");
+
+    session.startTransaction();
+    assert.commandFailedWithCode(sessionColl.insert({b: 2}), ErrorCodes.NotWritablePrimary);
+}, primary.port);
+
+jsTestLog("Wait on the first transaction fail point");
+txnFpBefore.wait();
+
+const stepdownFP = configureFailPoint(primary, "stepdownHangAfterGrabbingRSTL");
+
+jsTestLog("Issue a stepdown in a parallel shell");
+const stepdown = startParallelShell(() => {
+    assert.commandWorked(db.adminCommand({replSetStepDown: 10 * 60, force: true}));
+}, primary.port);
+
+jsTestLog("Wait on the stepdown fail point");
+stepdownFP.wait();
+
+// The txn will be forced to wait for stepdown to finish.
+jsTestLog("Release the first transaction fail point and wait in the second");
+txnFpBefore.off();
+txnFpAfter.wait();
+
+jsTestLog("Let stepdown finish");
+stepdownFP.off();
+stepdown();
+
+jsTestLog("Wait on the second transaction fail point");
+txnFpAfter.wait();
+
+jsTestLog("Let the transaction attempt finish");
+txnFpAfter.off();
+txn();
+
+jsTestLog("Checking that the transaction never succeeded");
+assert.eq(1, primaryColl.find().toArray().length);
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c2a46a1954b..0a63080f3c1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -130,6 +130,8 @@ MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck);
 MONGO_FAIL_POINT_DEFINE(hangBeforeReconfigOnDrainComplete);
 // Will cause signal drain complete to hang after reconfig.
 MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete);
+// Hang after grabbing the RSTL but before we start rejecting writes.
+MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL);
 
 // Number of times we tried to go live as a secondary.
 Counter64 attemptsToBecomeSecondary;
@@ -2539,6 +2541,8 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
     AutoGetRstlForStepUpStepDown arsd(
         this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown, deadline);
 
+    stepdownHangAfterGrabbingRSTL.pauseWhileSet();
+
     stdx::unique_lock<Latch> lk(_mutex);
 
     opCtx->checkForInterrupt();
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 9eb7d3137a3..c9355a84d58 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -44,6 +44,7 @@
 #include "mongo/db/command_can_run_here.h"
 #include "mongo/db/commands.h"
 #include "mongo/db/commands/txn_cmds_gen.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
 #include "mongo/db/curop.h"
 #include "mongo/db/curop_failpoint_helpers.h"
 #include "mongo/db/curop_metrics.h"
@@ -114,6 +115,8 @@ MONGO_FAIL_POINT_DEFINE(waitAfterNewStatementBlocksBehindPrepare);
 MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution);
 MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand);
 MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOut);
+MONGO_FAIL_POINT_DEFINE(hangBeforeSettingTxnInterruptFlag);
+MONGO_FAIL_POINT_DEFINE(hangAfterCheckingWritabilityForMultiDocumentTransactions);
 
 // Tracks the number of times a legacy unacknowledged write failed due to
 // not primary error resulted in network disconnection.
@@ -1010,6 +1013,7 @@ void execCommandDatabase(OperationContext* opCtx,
             // Kill this operation on step down even if it hasn't taken write locks yet, because it
             // could conflict with transactions from a new primary.
             if (inMultiDocumentTransaction) {
+                hangBeforeSettingTxnInterruptFlag.pauseWhileSet();
                 opCtx->setAlwaysInterruptAtStepDownOrUp();
             }
 
@@ -1051,6 +1055,18 @@ void execCommandDatabase(OperationContext* opCtx,
                         "node is in drain mode",
                         optedIn || alwaysAllowed);
             }
+
+            // We acquire the RSTL which helps us here in two ways:
+            // 1) It forces us to wait out any outstanding stepdown attempts.
+            // 2) It guarantees that future RSTL holders will see the
+            // 'setAlwaysInterruptAtStepDownOrUp' flag we set above.
+            if (inMultiDocumentTransaction) {
+                hangAfterCheckingWritabilityForMultiDocumentTransactions.pauseWhileSet();
+                repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+                uassert(ErrorCodes::NotWritablePrimary,
+                        "Cannot start a transaction in a non-primary state",
+                        replCoord->canAcceptWritesForDatabase(opCtx, dbname));
+            }
         }
 
         if (command->adminOnly()) {
author	Vesselina Ratcheva <vesselina.ratcheva@10gen.com>	2021-10-19 23:49:48 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-10-21 00:33:46 +0000
commit	bba630c895a90a59b78d12105424575b2f847b91 (patch)
tree	949515fef7b1124d7c244fc2b1f687a2f746b4d8
parent	73ab1b397b1292dd791775e9f2f73fb0b3b4699b (diff)
download	mongo-bba630c895a90a59b78d12105424575b2f847b91.tar.gz