diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2021-10-21 00:34:05 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-21 00:56:34 +0000 |
commit | 5bb885c0fd06484ca4b3a596b1959d64e3080e8b (patch) | |
tree | b72ec3d4ca4bfd28d19233b3110e6b8f79e0e3c0 | |
parent | ab0980f5e5c20e11ebd2de0a40e515bf59defaef (diff) | |
download | mongo-5bb885c0fd06484ca4b3a596b1959d64e3080e8b.tar.gz |
SERVER-59108 Resolve race with transaction operation not killed after stepdown
(cherry picked from commit 1b31e6ca3d25a35f31f48547aafe0ec33c8c9bfd)
-rw-r--r-- | jstests/replsets/stepdown_race_with_transaction.js | 78 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/service_entry_point_common.cpp | 20 |
3 files changed, 103 insertions, 0 deletions
diff --git a/jstests/replsets/stepdown_race_with_transaction.js b/jstests/replsets/stepdown_race_with_transaction.js new file mode 100644 index 00000000000..f1674f5ea3a --- /dev/null +++ b/jstests/replsets/stepdown_race_with_transaction.js @@ -0,0 +1,78 @@ +/** + * Tests that multi-documment transactions no longer race with stepdown over + * "setAlwaysInterruptAtStepDownOrUp". + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); + +const rst = new ReplSetTest({nodes: 1}); +rst.startSet(); +rst.initiate(); + +const dbName = "testdb"; +const collName = "testcoll"; + +const primary = rst.getPrimary(); +const primaryDB = primary.getDB(dbName); +const primaryColl = primaryDB.getCollection(collName); + +// Insert a document that we will later modify in a transaction. +assert.commandWorked(primaryColl.insert({a: 1})); + +// In the first part of the race, we set the 'setAlwaysInterruptAtStepDownOrUp' too late, +// after stepDown is already done interrupting operations. +const txnFpBefore = configureFailPoint(primary, "hangBeforeSettingTxnInterruptFlag"); + +// The second critical part of the race is when the transaction thread has already passed +// the regular "not primary" checks by the time stepDown has completed and updated +// writability. (This is the reason we check writability again in the accompanying patch.) +const txnFpAfter = + configureFailPoint(primary, "hangAfterCheckingWritabilityForMultiDocumentTransactions"); + +jsTestLog("Start the transaction in a parallel shell"); +const txn = startParallelShell(() => { + const session = db.getMongo().startSession(); + const sessionDB = session.getDatabase("testdb"); + const sessionColl = sessionDB.getCollection("testcoll"); + + session.startTransaction(); + assert.commandFailedWithCode(sessionColl.insert({b: 2}), ErrorCodes.NotWritablePrimary); +}, primary.port); + +jsTestLog("Wait on the first transaction fail point"); +txnFpBefore.wait(); + +const stepdownFP = configureFailPoint(primary, "stepdownHangAfterGrabbingRSTL"); + +jsTestLog("Issue a stepdown in a parallel shell"); +const stepdown = startParallelShell(() => { + assert.commandWorked(db.adminCommand({replSetStepDown: 10 * 60, force: true})); +}, primary.port); + +jsTestLog("Wait on the stepdown fail point"); +stepdownFP.wait(); + +// The txn will be forced to wait for stepdown to finish. +jsTestLog("Release the first transaction fail point and wait in the second"); +txnFpBefore.off(); +txnFpAfter.wait(); + +jsTestLog("Let stepdown finish"); +stepdownFP.off(); +stepdown(); + +jsTestLog("Wait on the second transaction fail point"); +txnFpAfter.wait(); + +jsTestLog("Let the transaction attempt finish"); +txnFpAfter.off(); +txn(); + +jsTestLog("Checking that the transaction never succeeded"); +assert.eq(1, primaryColl.find().toArray().length); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 357d461c64f..29b9cfa4332 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -109,6 +109,8 @@ MONGO_FAIL_POINT_DEFINE(holdStableTimestampAtSpecificTimestamp); MONGO_FAIL_POINT_DEFINE(stepdownHangBeforeRSTLEnqueue); // Fail setMaintenanceMode with ErrorCodes::NotSecondary to simulate a concurrent election. MONGO_FAIL_POINT_DEFINE(setMaintenanceModeFailsWithNotSecondary); +// Hang after grabbing the RSTL but before we start rejecting writes. +MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL); // Tracks the last state transition performed in this replca set. std::string lastStateTransition; @@ -2072,6 +2074,9 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, AutoGetRstlForStepUpStepDown arsd( this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown, deadline); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &stepdownHangAfterGrabbingRSTL, opCtx, "stepdownHangAfterGrabbingRSTL"); + stdx::unique_lock<Latch> lk(_mutex); opCtx->checkForInterrupt(); diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index 0b01b2533fd..c1acb9cc44c 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -44,6 +44,7 @@ #include "mongo/db/commands.h" #include "mongo/db/commands/test_commands_enabled.h" #include "mongo/db/commands/txn_cmds_gen.h" +#include "mongo/db/concurrency/replication_state_transition_lock_guard.h" #include "mongo/db/curop.h" #include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/curop_metrics.h" @@ -105,6 +106,8 @@ MONGO_FAIL_POINT_DEFINE(sleepMillisAfterCommandExecutionBegins); MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution); MONGO_FAIL_POINT_DEFINE(waitAfterNewStatementBlocksBehindPrepare); MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOut); +MONGO_FAIL_POINT_DEFINE(hangBeforeSettingTxnInterruptFlag); +MONGO_FAIL_POINT_DEFINE(hangAfterCheckingWritabilityForMultiDocumentTransactions); // Tracks the number of times a legacy unacknowledged write failed due to // not primary error resulted in network disconnection. @@ -746,6 +749,8 @@ void execCommandDatabase(OperationContext* opCtx, // Kill this operation on step down even if it hasn't taken write locks yet, because it // could conflict with transactions from a new primary. if (inMultiDocumentTransaction) { + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &hangBeforeSettingTxnInterruptFlag, opCtx, "hangBeforeSettingTxnInterruptFlag"); opCtx->setAlwaysInterruptAtStepDownOrUp(); } @@ -787,6 +792,21 @@ void execCommandDatabase(OperationContext* opCtx, "node is in drain mode", optedIn || alwaysAllowed); } + + // We acquire the RSTL which helps us here in two ways: + // 1) It forces us to wait out any outstanding stepdown attempts. + // 2) It guarantees that future RSTL holders will see the + // 'setAlwaysInterruptAtStepDownOrUp' flag we set above. + if (inMultiDocumentTransaction) { + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &hangAfterCheckingWritabilityForMultiDocumentTransactions, + opCtx, + "hangAfterCheckingWritabilityForMultiDocumentTransactions"); + repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX); + uassert(ErrorCodes::NotWritablePrimary, + "Cannot start a transaction in a non-primary state", + replCoord->canAcceptWritesForDatabase(opCtx, dbname)); + } } if (command->adminOnly()) { |