diff options
author | Pavi Vetriselvan <pvselvan@umich.edu> | 2019-05-23 14:30:45 -0400 |
---|---|---|
committer | Pavi Vetriselvan <pvselvan@umich.edu> | 2019-05-23 14:32:25 -0400 |
commit | 9b8814b6e7130652eca6f8fdaa9ff215b07d2ebb (patch) | |
tree | ef6b1f1b6e75a1ee830c42edf579b9711703c2c2 | |
parent | d08633878f0cc33b5484decdb8df17845c842089 (diff) | |
download | mongo-9b8814b6e7130652eca6f8fdaa9ff215b07d2ebb.tar.gz |
SERVER-41037 kill all user operations that hit prepare conflicts on step up
-rw-r--r-- | jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js | 128 | ||||
-rw-r--r-- | src/mongo/db/prepare_conflict_tracker.h | 7 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 6 |
3 files changed, 135 insertions, 6 deletions
diff --git a/jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js b/jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js new file mode 100644 index 00000000000..3ad0bd22248 --- /dev/null +++ b/jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js @@ -0,0 +1,128 @@ +/* + * Tests that a read operation on a secondary that encounters a prepare conflict gets killed + * when we cause the secondary to step up. + * + * @tags: [uses_transactions, uses_prepare_transaction] + */ + +(function() { + "use strict"; + + load("jstests/core/txns/libs/prepare_helpers.js"); + load("jstests/libs/check_log.js"); + + var rst = new ReplSetTest({nodes: 2}); + rst.startSet(); + + const config = rst.getReplSetConfig(); + // Increase the election timeout so that we do not accidentally trigger an election before + // we make the secondary step up. + config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000}; + rst.initiate(config); + + let primary = rst.getPrimary(); + let secondary = rst.getSecondary(); + + const dbName = "test"; + const collName = "kill_reads_with_prepare_conflicts_during_step_up"; + + const primaryDB = primary.getDB(dbName); + const primaryColl = primaryDB[collName]; + + let session = primary.startSession(); + const sessionID = session.getSessionId(); + let sessionDB = session.getDatabase(dbName); + const sessionColl = sessionDB.getCollection(collName); + + assert.commandWorked(secondary.adminCommand( + {configureFailPoint: "WTPrintPrepareConflictLog", mode: "alwaysOn"})); + + // Insert a document that we will later modify in a transaction. + assert.commandWorked(primaryColl.insert({_id: 1})); + + jsTestLog("Start a transaction and prepare it"); + session.startTransaction(); + assert.commandWorked(sessionColl.update({_id: 1}, {_id: 1, a: 1})); + const prepareTimestamp = PrepareHelpers.prepareTransaction(session); + + // Advance the clusterTime with another insert. + const clusterTimeAfterPrepare = + assert + .commandWorked(primaryColl.runCommand( + "insert", {documents: [{advanceClusterTime: 1}], writeConcern: {w: "majority"}})) + .operationTime; + + // Ensure that the secondary replicates the prepare and the additional insert. + rst.awaitReplication(); + + // Make sure a secondary read using afterClusterTime times out when trying to + // read a prepared document. + const secondaryDB = secondary.getDB(dbName); + assert.commandFailedWithCode(secondaryDB.runCommand({ + find: collName, + filter: {_id: 1}, + readConcern: {afterClusterTime: clusterTimeAfterPrepare}, + maxTimeMS: 2 * 1000 // 2 seconds + }), + ErrorCodes.MaxTimeMSExpired); + + // Clear secondary log so that when we wait for the WTPrintPrepareConflictLog fail point, we + // do not count the previous find. + assert.commandWorked(secondaryDB.adminCommand({clearLog: "global"})); + + TestData.dbName = dbName; + TestData.collName = collName; + TestData.clusterTime = clusterTimeAfterPrepare; + + const waitForSecondaryReadBlockedOnPrepareConflictThread = startParallelShell(() => { + // Allow for secondary reads. + db.getMongo().setSlaveOk(); + const parallelTestDB = db.getSiblingDB(TestData.dbName); + const parallelTestCollName = TestData.collName; + + // The following read should block on the prepared transaction since it will be + // reading a conflicting document using an afterClusterTime later than the + // prepareTimestamp. + assert.commandFailedWithCode(parallelTestDB.runCommand({ + find: parallelTestCollName, + filter: {_id: 1}, + readConcern: {afterClusterTime: TestData.clusterTime} + }), + ErrorCodes.InterruptedDueToReplStateChange); + }, secondary.port); + + jsTestLog("Waiting for failpoint"); + checkLog.contains(secondary, "WTPrintPrepareConflictLog fail point enabled"); + + // Once we've confirmed that the find command has hit a prepare conflict on the secondary, cause + // that secondary to step up. + jsTestLog("Stepping up secondary"); + rst.stepUp(secondary); + + waitForSecondaryReadBlockedOnPrepareConflictThread(); + + rst.waitForState(secondary, ReplSetTest.State.PRIMARY); + rst.waitForState(primary, ReplSetTest.State.SECONDARY); + + primary = rst.getPrimary(); + + // Make sure we can successfully commit the prepared transaction. + jsTestLog("Restoring shell session state"); + session = PrepareHelpers.createSessionWithGivenId(primary, sessionID); + sessionDB = session.getDatabase(dbName); + // The transaction on this session should have a txnNumber of 0. We explicitly set this + // since createSessionWithGivenId does not restore the current txnNumber in the shell. + session.setTxnNumber_forTesting(0); + const txnNumber = session.getTxnNumber_forTesting(); + + jsTestLog("Committing transaction"); + // Commit the transaction. + assert.commandWorked(sessionDB.adminCommand({ + commitTransaction: 1, + commitTimestamp: prepareTimestamp, + txnNumber: NumberLong(txnNumber), + autocommit: false, + })); + + rst.stopSet(); +})();
\ No newline at end of file diff --git a/src/mongo/db/prepare_conflict_tracker.h b/src/mongo/db/prepare_conflict_tracker.h index e9deec22022..48e4fa1a063 100644 --- a/src/mongo/db/prepare_conflict_tracker.h +++ b/src/mongo/db/prepare_conflict_tracker.h @@ -36,11 +36,8 @@ namespace mongo { /** * The PrepareConflictTracker tracks if a read operation encounters a prepare conflict. If it - * is blocked on a prepare conflict, we will kill the operation during step down. This will - * help us avoid deadlocks between prepare conflicts and state transitions. - * - * TODO SERVER-41037: Modify above comment to include step up or use "state transitions" to - * encompass both. + * is blocked on a prepare conflict, we will kill the operation during state transitions (step + * up/step down). This will help us avoid deadlocks between prepare conflicts and state transitions. */ class PrepareConflictTracker { public: diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index dceb978a6f2..87bed913f8a 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1025,7 +1025,11 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx, _externalState->onDrainComplete(opCtx); - ReplicationStateTransitionLockGuard transitionGuard(opCtx, MODE_X); + // Kill all user writes and user reads that encounter a prepare conflict. Also kills select + // internal operations. Although secondaries cannot accept writes, a step up can kill writes + // that were blocked behind the RSTL lock held by a step down attempt. These writes will be + // killed with a retryable error code during step up. + AutoGetRstlForStepUpStepDown arsu(this, opCtx); lk.lock(); // Exit drain mode only if we're actually in draining mode, the apply buffer is empty in the |