summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2021-10-19 23:49:48 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-21 00:33:46 +0000
commitbba630c895a90a59b78d12105424575b2f847b91 (patch)
tree949515fef7b1124d7c244fc2b1f687a2f746b4d8
parent73ab1b397b1292dd791775e9f2f73fb0b3b4699b (diff)
downloadmongo-bba630c895a90a59b78d12105424575b2f847b91.tar.gz
SERVER-59108 Resolve race with transaction operation not killed after stepdown
(cherry picked from commit 1b31e6ca3d25a35f31f48547aafe0ec33c8c9bfd)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml2
-rw-r--r--jstests/replsets/stepdown_race_with_transaction.js78
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/service_entry_point_common.cpp16
4 files changed, 100 insertions, 0 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 40d48f9170b..f5e15553e19 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -173,6 +173,8 @@ all:
test_file: jstests/core/mod_special_values.js
- ticket: SERVER-59613
test_file: jstests/aggregation/range.js
+ - ticket: SERVER-59108
+ test_file: jstests/replsets/stepdown_race_with_transaction.js
suites:
diff --git a/jstests/replsets/stepdown_race_with_transaction.js b/jstests/replsets/stepdown_race_with_transaction.js
new file mode 100644
index 00000000000..f1674f5ea3a
--- /dev/null
+++ b/jstests/replsets/stepdown_race_with_transaction.js
@@ -0,0 +1,78 @@
+/**
+ * Tests that multi-documment transactions no longer race with stepdown over
+ * "setAlwaysInterruptAtStepDownOrUp".
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+const rst = new ReplSetTest({nodes: 1});
+rst.startSet();
+rst.initiate();
+
+const dbName = "testdb";
+const collName = "testcoll";
+
+const primary = rst.getPrimary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB.getCollection(collName);
+
+// Insert a document that we will later modify in a transaction.
+assert.commandWorked(primaryColl.insert({a: 1}));
+
+// In the first part of the race, we set the 'setAlwaysInterruptAtStepDownOrUp' too late,
+// after stepDown is already done interrupting operations.
+const txnFpBefore = configureFailPoint(primary, "hangBeforeSettingTxnInterruptFlag");
+
+// The second critical part of the race is when the transaction thread has already passed
+// the regular "not primary" checks by the time stepDown has completed and updated
+// writability. (This is the reason we check writability again in the accompanying patch.)
+const txnFpAfter =
+ configureFailPoint(primary, "hangAfterCheckingWritabilityForMultiDocumentTransactions");
+
+jsTestLog("Start the transaction in a parallel shell");
+const txn = startParallelShell(() => {
+ const session = db.getMongo().startSession();
+ const sessionDB = session.getDatabase("testdb");
+ const sessionColl = sessionDB.getCollection("testcoll");
+
+ session.startTransaction();
+ assert.commandFailedWithCode(sessionColl.insert({b: 2}), ErrorCodes.NotWritablePrimary);
+}, primary.port);
+
+jsTestLog("Wait on the first transaction fail point");
+txnFpBefore.wait();
+
+const stepdownFP = configureFailPoint(primary, "stepdownHangAfterGrabbingRSTL");
+
+jsTestLog("Issue a stepdown in a parallel shell");
+const stepdown = startParallelShell(() => {
+ assert.commandWorked(db.adminCommand({replSetStepDown: 10 * 60, force: true}));
+}, primary.port);
+
+jsTestLog("Wait on the stepdown fail point");
+stepdownFP.wait();
+
+// The txn will be forced to wait for stepdown to finish.
+jsTestLog("Release the first transaction fail point and wait in the second");
+txnFpBefore.off();
+txnFpAfter.wait();
+
+jsTestLog("Let stepdown finish");
+stepdownFP.off();
+stepdown();
+
+jsTestLog("Wait on the second transaction fail point");
+txnFpAfter.wait();
+
+jsTestLog("Let the transaction attempt finish");
+txnFpAfter.off();
+txn();
+
+jsTestLog("Checking that the transaction never succeeded");
+assert.eq(1, primaryColl.find().toArray().length);
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c2a46a1954b..0a63080f3c1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -130,6 +130,8 @@ MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck);
MONGO_FAIL_POINT_DEFINE(hangBeforeReconfigOnDrainComplete);
// Will cause signal drain complete to hang after reconfig.
MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete);
+// Hang after grabbing the RSTL but before we start rejecting writes.
+MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL);
// Number of times we tried to go live as a secondary.
Counter64 attemptsToBecomeSecondary;
@@ -2539,6 +2541,8 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
AutoGetRstlForStepUpStepDown arsd(
this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown, deadline);
+ stepdownHangAfterGrabbingRSTL.pauseWhileSet();
+
stdx::unique_lock<Latch> lk(_mutex);
opCtx->checkForInterrupt();
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 9eb7d3137a3..c9355a84d58 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -44,6 +44,7 @@
#include "mongo/db/command_can_run_here.h"
#include "mongo/db/commands.h"
#include "mongo/db/commands/txn_cmds_gen.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
#include "mongo/db/curop.h"
#include "mongo/db/curop_failpoint_helpers.h"
#include "mongo/db/curop_metrics.h"
@@ -114,6 +115,8 @@ MONGO_FAIL_POINT_DEFINE(waitAfterNewStatementBlocksBehindPrepare);
MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution);
MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand);
MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOut);
+MONGO_FAIL_POINT_DEFINE(hangBeforeSettingTxnInterruptFlag);
+MONGO_FAIL_POINT_DEFINE(hangAfterCheckingWritabilityForMultiDocumentTransactions);
// Tracks the number of times a legacy unacknowledged write failed due to
// not primary error resulted in network disconnection.
@@ -1010,6 +1013,7 @@ void execCommandDatabase(OperationContext* opCtx,
// Kill this operation on step down even if it hasn't taken write locks yet, because it
// could conflict with transactions from a new primary.
if (inMultiDocumentTransaction) {
+ hangBeforeSettingTxnInterruptFlag.pauseWhileSet();
opCtx->setAlwaysInterruptAtStepDownOrUp();
}
@@ -1051,6 +1055,18 @@ void execCommandDatabase(OperationContext* opCtx,
"node is in drain mode",
optedIn || alwaysAllowed);
}
+
+ // We acquire the RSTL which helps us here in two ways:
+ // 1) It forces us to wait out any outstanding stepdown attempts.
+ // 2) It guarantees that future RSTL holders will see the
+ // 'setAlwaysInterruptAtStepDownOrUp' flag we set above.
+ if (inMultiDocumentTransaction) {
+ hangAfterCheckingWritabilityForMultiDocumentTransactions.pauseWhileSet();
+ repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+ uassert(ErrorCodes::NotWritablePrimary,
+ "Cannot start a transaction in a non-primary state",
+ replCoord->canAcceptWritesForDatabase(opCtx, dbname));
+ }
}
if (command->adminOnly()) {