summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamy Lanka <samy.lanka@mongodb.com>2020-12-16 03:46:18 +0000
committerSamy Lanka <samy.lanka@mongodb.com>2021-04-23 15:05:07 +0000
commita230371af696ff2eaf17c1937fb0ca62dab476d3 (patch)
tree62c902424ab8a131e01a08238119c24d5db49c47
parent93d51f181e657a283d3ca1acb81e7e32f0eac633 (diff)
downloadmongo-a230371af696ff2eaf17c1937fb0ca62dab476d3.tar.gz
SERVER-50486 Always interrupt multi-document transactions on step down or step up
(cherry picked from commit 5e9d3327d5d08288a932ee77db3be4eb0d45c9c8)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml2
-rw-r--r--jstests/replsets/dont_refresh_session_prepare_secondary.js116
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp3
-rw-r--r--src/mongo/db/service_entry_point_common.cpp9
4 files changed, 130 insertions, 0 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 1fe400bfd16..ed76885441c 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -125,6 +125,8 @@ all:
test_file: jstests/replsets/not_primary_errors_returned_during_rollback_if_helloOk.js
- ticket: SERVER-48179
test_file: jstests/replsets/reconfig_removes_node_in_rollback.js
+ - ticket: SERVER-50486
+ test_file: jstests/replsets/dont_refresh_session_prepare_secondary.js
suites:
diff --git a/jstests/replsets/dont_refresh_session_prepare_secondary.js b/jstests/replsets/dont_refresh_session_prepare_secondary.js
new file mode 100644
index 00000000000..6dd621b1904
--- /dev/null
+++ b/jstests/replsets/dont_refresh_session_prepare_secondary.js
@@ -0,0 +1,116 @@
+/**
+ * Tests session invalidation and checking out a session without refresh on a new secondary.
+ *
+ * Tests this by:
+ * 1. Starting with a primary that is running a transaction. We will hang the primary before it
+ * checks out the session for the transaction.
+ * 2. Step up another node and prepare a transaction on the same session used for the transaction on
+ * the old primary. This should cause the old primary to step down, invalidating the relevant
+ * session.
+ * 3. When the old primary replicates the prepared transaction, wait so that the update to the
+ * config.transactions table for the prepared transaction happens before the node prepares the
+ * transaction. Even though the session is still invalidated, applying the prepare should check
+ * out the session without refreshing from disk.
+ *
+ * See SERVER-50486 for more details.
+ *
+ * @tags: [uses_transactions, uses_prepare_transaction]
+ */
+(function() {
+"use strict";
+
+load("jstests/core/txns/libs/prepare_helpers.js");
+load("jstests/libs/parallel_shell_helpers.js");
+load("jstests/libs/fail_point_util.js");
+
+const replTest = new ReplSetTest({nodes: 2});
+replTest.startSet();
+replTest.initiate();
+
+const dbName = "test";
+const collName = "coll";
+const primary = replTest.getPrimary();
+const newPrimary = replTest.getSecondary();
+
+const testDB = primary.getDB(dbName);
+testDB.dropDatabase();
+assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}}));
+
+const session = primary.startSession({causalConsistency: false});
+const sessionID = session.getSessionId();
+
+let failPoint = configureFailPoint(primary, "hangBeforeSessionCheckOut");
+
+const txnFunc = function(sessionID) {
+ load("jstests/core/txns/libs/prepare_helpers.js");
+ const session = PrepareHelpers.createSessionWithGivenId(db.getMongo(), sessionID);
+ const sessionDB = session.getDatabase("test");
+ session.startTransaction({writeConcern: {w: "majority"}});
+ assert.commandFailedWithCode(
+ sessionDB.runCommand({find: "test", readConcern: {level: "snapshot"}}),
+ ErrorCodes.InterruptedDueToReplStateChange);
+};
+const waitForTxnShell = startParallelShell(funWithArgs(txnFunc, sessionID), primary.port);
+failPoint.wait();
+
+replTest.stepUp(newPrimary);
+assert.eq(replTest.getPrimary(), newPrimary, "Primary didn't change.");
+
+const prepareTxnFunc = function(sessionID) {
+ load("jstests/core/txns/libs/prepare_helpers.js");
+ const newPrimaryDB = db.getMongo().getDB("test");
+
+ // Start a transaction on the same session as before, but with a higher transaction number.
+ assert.commandWorked(newPrimaryDB.runCommand({
+ insert: "coll",
+ documents: [{c: 1}],
+ lsid: sessionID,
+ txnNumber: NumberLong(10),
+ startTransaction: true,
+ autocommit: false
+ }));
+ assert.commandWorked(newPrimaryDB.adminCommand({
+ prepareTransaction: 1,
+ lsid: sessionID,
+ txnNumber: NumberLong(10),
+ autocommit: false,
+ writeConcern: {w: "majority"}
+ }));
+};
+
+let applyFailPoint = configureFailPoint(primary, "hangBeforeSessionCheckOutForApplyPrepare");
+const waitForPrepareTxnShell =
+ startParallelShell(funWithArgs(prepareTxnFunc, sessionID), newPrimary.port);
+applyFailPoint.wait();
+
+// Wait so that the update to the config.transactions table from the newly prepared transaction
+// happens before the user transaction checks out the session. Otherwise, we won't see the
+// transaction state as being "Prepared" when refreshing the session from storage.
+sleep(10000);
+
+failPoint.off();
+
+// Wait so that the user transaction checks out the session before the thread applying the
+// prepareTransaction is unpaused. Otherwise, applying the prepareTransaction will make the session
+// valid.
+sleep(10000);
+
+applyFailPoint.off();
+
+waitForPrepareTxnShell();
+waitForTxnShell();
+
+let newPrimaryDB = replTest.getPrimary().getDB("test");
+const commitTimestamp =
+ assert.commandWorked(newPrimaryDB.runCommand({insert: collName, documents: [{}]})).opTime.ts;
+
+assert.commandWorked(newPrimaryDB.adminCommand({
+ commitTransaction: 1,
+ commitTimestamp: commitTimestamp,
+ lsid: sessionID,
+ txnNumber: NumberLong(10),
+ autocommit: false
+}));
+
+replTest.stopSet();
+})();
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index 0a665feccc8..f458cc951ae 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -60,6 +60,8 @@ MONGO_FAIL_POINT_DEFINE(skipReconstructPreparedTransactions);
// conflict error.
MONGO_FAIL_POINT_DEFINE(applyPrepareTxnOpsFailsWithWriteConflict);
+MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOutForApplyPrepare);
+
// Apply the oplog entries for a prepare or a prepared commit during recovery/initial sync.
Status _applyOperationsForTransaction(OperationContext* opCtx,
const std::vector<OplogEntry>& ops,
@@ -418,6 +420,7 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
// The write on transaction table may be applied concurrently, so refreshing state
// from disk may read that write, causing starting a new transaction on an existing
// txnNumber. Thus, we start a new transaction without refreshing state from disk.
+ hangBeforeSessionCheckOutForApplyPrepare.pauseWhileSet();
MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);
auto txnParticipant = TransactionParticipant::get(opCtx);
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index a1565c9d7e9..3cdb236ad02 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -112,6 +112,7 @@ MONGO_FAIL_POINT_DEFINE(sleepMillisAfterCommandExecutionBegins);
MONGO_FAIL_POINT_DEFINE(waitAfterNewStatementBlocksBehindPrepare);
MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution);
MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand);
+MONGO_FAIL_POINT_DEFINE(hangBeforeSessionCheckOut);
// Tracks the number of times a legacy unacknowledged write failed due to
// not primary error resulted in network disconnection.
@@ -556,6 +557,7 @@ void invokeWithSessionCheckedOut(OperationContext* opCtx,
// This constructor will check out the session. It handles the appropriate state management
// for both multi-statement transactions and retryable writes. Currently, only requests with
// a transaction number will check out the session.
+ hangBeforeSessionCheckOut.pauseWhileSet();
MongoDOperationContextSession sessionTxnState(opCtx);
auto txnParticipant = TransactionParticipant::get(opCtx);
@@ -1030,6 +1032,13 @@ void execCommandDatabase(OperationContext* opCtx,
if (!opCtx->getClient()->isInDirectClient() &&
!MONGO_unlikely(skipCheckingForNotPrimaryInCommandDispatch.shouldFail())) {
const bool inMultiDocumentTransaction = (sessionOptions.getAutocommit() == false);
+
+ // Kill this operation on step down even if it hasn't taken write locks yet, because it
+ // could conflict with transactions from a new primary.
+ if (inMultiDocumentTransaction) {
+ opCtx->setAlwaysInterruptAtStepDownOrUp();
+ }
+
auto allowed = command->secondaryAllowed(opCtx->getServiceContext());
bool alwaysAllowed = allowed == Command::AllowedOnSecondary::kAlways;
bool couldHaveOptedIn =