diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2021-10-19 23:49:48 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-21 00:33:46 +0000 |
commit | bba630c895a90a59b78d12105424575b2f847b91 (patch) | |
tree | 949515fef7b1124d7c244fc2b1f687a2f746b4d8 | |
parent | 73ab1b397b1292dd791775e9f2f73fb0b3b4699b (diff) | |
download | mongo-bba630c895a90a59b78d12105424575b2f847b91.tar.gz |
SERVER-59108 Resolve race with transaction operation not killed after stepdown
(cherry picked from commit 1b31e6ca3d25a35f31f48547aafe0ec33c8c9bfd)
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 2 | ||||
-rw-r--r-- | jstests/replsets/stepdown_race_with_transaction.js | 78 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/service_entry_point_common.cpp | 16 |
4 files changed, 100 insertions, 0 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 40d48f9170b..f5e15553e19 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -173,6 +173,8 @@ all: test_file: jstests/core/mod_special_values.js - ticket: SERVER-59613 test_file: jstests/aggregation/range.js + - ticket: SERVER-59108 + test_file: jstests/replsets/stepdown_race_with_transaction.js suites: diff --git a/jstests/replsets/stepdown_race_with_transaction.js b/jstests/replsets/stepdown_race_with_transaction.js new file mode 100644 index 00000000000..f1674f5ea3a --- /dev/null +++ b/jstests/replsets/stepdown_race_with_transaction.js @@ -0,0 +1,78 @@ +/** + * Tests that multi-documment transactions no longer race with stepdown over + * "setAlwaysInterruptAtStepDownOrUp". + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); + +const rst = new ReplSetTest({nodes: 1}); +rst.startSet(); +rst.initiate(); + +const dbName = "testdb"; +const collName = "testcoll"; + +const primary = rst.getPrimary(); +const primaryDB = primary.getDB(dbName); +const primaryColl = primaryDB.getCollection(collName); + +// Insert a document that we will later modify in a transaction. +assert.commandWorked(primaryColl.insert({a: 1})); + +// In the first part of the race, we set the 'setAlwaysInterruptAtStepDownOrUp' too late, +// after stepDown is already done interrupting operations. +const txnFpBefore = configureFailPoint(primary, "hangBeforeSettingTxnInterruptFlag"); + +// The second critical part of the race is when the transaction thread has already passed +// the regular "not primary" checks by the time stepDown has completed and updated +// writability. (This is the reason we check writability again in the accompanying patch.) +const txnFpAfter = + configureFailPoint(primary, "hangAfterCheckingWritabilityForMultiDocumentTransactions"); + +jsTestLog("Start the transaction in a parallel shell"); +const txn = startParallelShell(() => { + const session = db.getMongo().startSession(); + const sessionDB = session.getDatabase("testdb"); + const sessionColl = sessionDB.getCollection("testcoll"); + + session.startTransaction(); + assert.commandFailedWithCode(sessionColl.insert({b: 2}), ErrorCodes.NotWritablePrimary); +}, primary.port); + +jsTestLog("Wait on the first transaction fail point"); +txnFpBefore.wait(); + +const stepdownFP = configureFailPoint(primary, "stepdownHangAfterGrabbingRSTL"); + +jsTestLog("Issue a stepdown in a parallel shell"); +const stepdown = startParallelShell(() => { + assert.commandWorked(db.adminCommand({replSetStepDown: 10 * 60, force: true})); +}, primary.port); + +jsTestLog("Wait on the stepdown fail point"); +stepdownFP.wait(); + +// The txn will be forced to wait for stepdown to finish. +jsTestLog("Release the first transaction fail point and wait in the second"); +txnFpBefore.off(); +txnFpAfter.wait(); + +jsTestLog("Let stepdown finish"); +stepdownFP.off(); +stepdown(); + +jsTestLog("Wait on the second transaction fail point"); +txnFpAfter.wait(); + +jsTestLog("Let the transaction attempt finish"); +txnFpAfter.off(); +txn(); + +jsTestLog("Checking that the transaction never succeeded"); +assert.eq(1, primaryColl.find().toArray().length); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index c2a46a1954b..0a63080f3c1 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -130,6 +130,8 @@ MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck); MONGO_FAIL_POINT_DEFINE(hangBeforeReconfigOnDrainComplete); // Will cause signal drain complete to hang after reconfig. MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete); +// Hang after grabbing the RSTL but before we start rejecting writes. +MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL); // Number of times we tried to go live as a secondary. Counter64 attemptsToBecomeSecondary; @@ -2539,6 +2541,8 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, AutoGetRstlForStepUpStepDown arsd( this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown, deadline); + stepdownHangAfterGrabbingRSTL.pauseWhileSet(); + stdx::unique_lock<Latch> lk(_mutex); opCtx->checkForInterrupt(); diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index 9eb7d3137a3..c9355a84d58 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -44,6 +44,7 @@ #include "mongo/db/command_can_run_here.h" #include "mongo/db/commands.h" #include "mongo/db/commands/txn_cmds_gen.h" +#include "mongo/db/concurrency/replication_state_transition_lock_guard.h" #include "mongo/db/curop.h" #include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/curop_metrics.h" @@ -114,6 +115,8 @@ MONGO_FAIL_POINT_DEFINE(waitAfterNewStatementBlocksBehindPrepare); MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution); MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand); MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOut); +MONGO_FAIL_POINT_DEFINE(hangBeforeSettingTxnInterruptFlag); +MONGO_FAIL_POINT_DEFINE(hangAfterCheckingWritabilityForMultiDocumentTransactions); // Tracks the number of times a legacy unacknowledged write failed due to // not primary error resulted in network disconnection. @@ -1010,6 +1013,7 @@ void execCommandDatabase(OperationContext* opCtx, // Kill this operation on step down even if it hasn't taken write locks yet, because it // could conflict with transactions from a new primary. if (inMultiDocumentTransaction) { + hangBeforeSettingTxnInterruptFlag.pauseWhileSet(); opCtx->setAlwaysInterruptAtStepDownOrUp(); } @@ -1051,6 +1055,18 @@ void execCommandDatabase(OperationContext* opCtx, "node is in drain mode", optedIn || alwaysAllowed); } + + // We acquire the RSTL which helps us here in two ways: + // 1) It forces us to wait out any outstanding stepdown attempts. + // 2) It guarantees that future RSTL holders will see the + // 'setAlwaysInterruptAtStepDownOrUp' flag we set above. + if (inMultiDocumentTransaction) { + hangAfterCheckingWritabilityForMultiDocumentTransactions.pauseWhileSet(); + repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX); + uassert(ErrorCodes::NotWritablePrimary, + "Cannot start a transaction in a non-primary state", + replCoord->canAcceptWritesForDatabase(opCtx, dbname)); + } } if (command->adminOnly()) { |