summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavi Vetriselvan <pvselvan@umich.edu>2019-05-23 14:30:45 -0400
committerPavi Vetriselvan <pvselvan@umich.edu>2019-05-23 14:32:25 -0400
commit9b8814b6e7130652eca6f8fdaa9ff215b07d2ebb (patch)
treeef6b1f1b6e75a1ee830c42edf579b9711703c2c2
parentd08633878f0cc33b5484decdb8df17845c842089 (diff)
downloadmongo-9b8814b6e7130652eca6f8fdaa9ff215b07d2ebb.tar.gz
SERVER-41037 kill all user operations that hit prepare conflicts on step up
-rw-r--r--jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js128
-rw-r--r--src/mongo/db/prepare_conflict_tracker.h7
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp6
3 files changed, 135 insertions, 6 deletions
diff --git a/jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js b/jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js
new file mode 100644
index 00000000000..3ad0bd22248
--- /dev/null
+++ b/jstests/replsets/kills_reads_with_prepare_conflicts_during_stepup.js
@@ -0,0 +1,128 @@
+/*
+ * Tests that a read operation on a secondary that encounters a prepare conflict gets killed
+ * when we cause the secondary to step up.
+ *
+ * @tags: [uses_transactions, uses_prepare_transaction]
+ */
+
+(function() {
+ "use strict";
+
+ load("jstests/core/txns/libs/prepare_helpers.js");
+ load("jstests/libs/check_log.js");
+
+ var rst = new ReplSetTest({nodes: 2});
+ rst.startSet();
+
+ const config = rst.getReplSetConfig();
+ // Increase the election timeout so that we do not accidentally trigger an election before
+ // we make the secondary step up.
+ config.settings = {"electionTimeoutMillis": 12 * 60 * 60 * 1000};
+ rst.initiate(config);
+
+ let primary = rst.getPrimary();
+ let secondary = rst.getSecondary();
+
+ const dbName = "test";
+ const collName = "kill_reads_with_prepare_conflicts_during_step_up";
+
+ const primaryDB = primary.getDB(dbName);
+ const primaryColl = primaryDB[collName];
+
+ let session = primary.startSession();
+ const sessionID = session.getSessionId();
+ let sessionDB = session.getDatabase(dbName);
+ const sessionColl = sessionDB.getCollection(collName);
+
+ assert.commandWorked(secondary.adminCommand(
+ {configureFailPoint: "WTPrintPrepareConflictLog", mode: "alwaysOn"}));
+
+ // Insert a document that we will later modify in a transaction.
+ assert.commandWorked(primaryColl.insert({_id: 1}));
+
+ jsTestLog("Start a transaction and prepare it");
+ session.startTransaction();
+ assert.commandWorked(sessionColl.update({_id: 1}, {_id: 1, a: 1}));
+ const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+
+ // Advance the clusterTime with another insert.
+ const clusterTimeAfterPrepare =
+ assert
+ .commandWorked(primaryColl.runCommand(
+ "insert", {documents: [{advanceClusterTime: 1}], writeConcern: {w: "majority"}}))
+ .operationTime;
+
+ // Ensure that the secondary replicates the prepare and the additional insert.
+ rst.awaitReplication();
+
+ // Make sure a secondary read using afterClusterTime times out when trying to
+ // read a prepared document.
+ const secondaryDB = secondary.getDB(dbName);
+ assert.commandFailedWithCode(secondaryDB.runCommand({
+ find: collName,
+ filter: {_id: 1},
+ readConcern: {afterClusterTime: clusterTimeAfterPrepare},
+ maxTimeMS: 2 * 1000 // 2 seconds
+ }),
+ ErrorCodes.MaxTimeMSExpired);
+
+ // Clear secondary log so that when we wait for the WTPrintPrepareConflictLog fail point, we
+ // do not count the previous find.
+ assert.commandWorked(secondaryDB.adminCommand({clearLog: "global"}));
+
+ TestData.dbName = dbName;
+ TestData.collName = collName;
+ TestData.clusterTime = clusterTimeAfterPrepare;
+
+ const waitForSecondaryReadBlockedOnPrepareConflictThread = startParallelShell(() => {
+ // Allow for secondary reads.
+ db.getMongo().setSlaveOk();
+ const parallelTestDB = db.getSiblingDB(TestData.dbName);
+ const parallelTestCollName = TestData.collName;
+
+ // The following read should block on the prepared transaction since it will be
+ // reading a conflicting document using an afterClusterTime later than the
+ // prepareTimestamp.
+ assert.commandFailedWithCode(parallelTestDB.runCommand({
+ find: parallelTestCollName,
+ filter: {_id: 1},
+ readConcern: {afterClusterTime: TestData.clusterTime}
+ }),
+ ErrorCodes.InterruptedDueToReplStateChange);
+ }, secondary.port);
+
+ jsTestLog("Waiting for failpoint");
+ checkLog.contains(secondary, "WTPrintPrepareConflictLog fail point enabled");
+
+ // Once we've confirmed that the find command has hit a prepare conflict on the secondary, cause
+ // that secondary to step up.
+ jsTestLog("Stepping up secondary");
+ rst.stepUp(secondary);
+
+ waitForSecondaryReadBlockedOnPrepareConflictThread();
+
+ rst.waitForState(secondary, ReplSetTest.State.PRIMARY);
+ rst.waitForState(primary, ReplSetTest.State.SECONDARY);
+
+ primary = rst.getPrimary();
+
+ // Make sure we can successfully commit the prepared transaction.
+ jsTestLog("Restoring shell session state");
+ session = PrepareHelpers.createSessionWithGivenId(primary, sessionID);
+ sessionDB = session.getDatabase(dbName);
+ // The transaction on this session should have a txnNumber of 0. We explicitly set this
+ // since createSessionWithGivenId does not restore the current txnNumber in the shell.
+ session.setTxnNumber_forTesting(0);
+ const txnNumber = session.getTxnNumber_forTesting();
+
+ jsTestLog("Committing transaction");
+ // Commit the transaction.
+ assert.commandWorked(sessionDB.adminCommand({
+ commitTransaction: 1,
+ commitTimestamp: prepareTimestamp,
+ txnNumber: NumberLong(txnNumber),
+ autocommit: false,
+ }));
+
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/prepare_conflict_tracker.h b/src/mongo/db/prepare_conflict_tracker.h
index e9deec22022..48e4fa1a063 100644
--- a/src/mongo/db/prepare_conflict_tracker.h
+++ b/src/mongo/db/prepare_conflict_tracker.h
@@ -36,11 +36,8 @@ namespace mongo {
/**
* The PrepareConflictTracker tracks if a read operation encounters a prepare conflict. If it
- * is blocked on a prepare conflict, we will kill the operation during step down. This will
- * help us avoid deadlocks between prepare conflicts and state transitions.
- *
- * TODO SERVER-41037: Modify above comment to include step up or use "state transitions" to
- * encompass both.
+ * is blocked on a prepare conflict, we will kill the operation during state transitions (step
+ * up/step down). This will help us avoid deadlocks between prepare conflicts and state transitions.
*/
class PrepareConflictTracker {
public:
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index dceb978a6f2..87bed913f8a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1025,7 +1025,11 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
_externalState->onDrainComplete(opCtx);
- ReplicationStateTransitionLockGuard transitionGuard(opCtx, MODE_X);
+ // Kill all user writes and user reads that encounter a prepare conflict. Also kills select
+ // internal operations. Although secondaries cannot accept writes, a step up can kill writes
+ // that were blocked behind the RSTL lock held by a step down attempt. These writes will be
+ // killed with a retryable error code during step up.
+ AutoGetRstlForStepUpStepDown arsu(this, opCtx);
lk.lock();
// Exit drain mode only if we're actually in draining mode, the apply buffer is empty in the