summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2020-07-14 08:40:28 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-15 23:59:45 +0000
commit525a83552575efba3274bebb2d4f94b8554c5e16 (patch)
treed9e0de23272f5f90cc73cd99a3b1a30d3dc934d2
parentf1c2d6c29d960506c770958ed39ebe0677a3fdda (diff)
downloadmongo-525a83552575efba3274bebb2d4f94b8554c5e16.tar.gz
SERVER-49471 Retry on WT_ROLLBACK (WriteConflictException) when applying prepareTransaction oplog entry.
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml1
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/replsets/apply_prepare_txn_write_conflict_robustness.js56
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp74
4 files changed, 112 insertions, 23 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml
index 87577f23687..5d1c1628203 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml
@@ -10,6 +10,7 @@ selector:
- jstests/replsets/initial_sync_rename_collection.js
- jstests/replsets/initial_sync_drop_collection.js
- jstests/replsets/step_down_chaining_disabled.js
+ - jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
executor:
config:
shell_options:
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 8a72b94f655..c21d8a6e04c 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -33,6 +33,10 @@ concurrency_replication_multiversion:
concurrency_sharded_replication_multiversion:
+replica_sets_multiversion:
+- ticket: SERVER-49471
+ test_file: jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
+
replica_sets_jscore_multiversion_passthrough:
- ticket: SERVER-47773
test_file: jstests/core/geo_near_tailable.js
diff --git a/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
new file mode 100644
index 00000000000..62e98879d48
--- /dev/null
+++ b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
@@ -0,0 +1,56 @@
+/*
+ * Tests that WT_ROLLBACK (WriteConflictException) error gets retried when applying
+ * prepareTransaction oplog entry on secondaries.
+ *
+ * @tags: [uses_transactions, uses_prepare_transaction]
+ */
+(function() {
+
+"use strict";
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+const dbName = jsTest.name();
+const collName = "coll";
+
+var rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const secondary = rst.getSecondary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB[collName];
+
+jsTestLog("Do a document write");
+assert.commandWorked(
+        primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
+
+// Enable fail point on secondary to cause apply prepare transaction oplog entry's ops to fail
+// with write conflict error at least once.
+assert.commandWorked(secondary.adminCommand(
+ {configureFailPoint: "applyPrepareTxnOpsFailsWithWriteConflict", mode: {times: 1}}));
+
+jsTestLog("Start transaction");
+let session = primary.startSession();
+let sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction({writeConcern: {w: "majority"}});
+assert.commandWorked(sessionColl.insert({_id: 1}));
+
+// PrepareTransaction cmd will be successful only if secondary is able to retry applying
+// prepareTransaction oplog entry on WT_ROLLBACK (WriteConflictException) error.
+jsTestLog("Prepare transaction");
+let prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+
+jsTestLog("Commit transaction");
+assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp));
+
+// Verify that the committed transaction data is present on secondary.
+assert.eq(secondary.getDB(dbName)[collName].findOne({_id: 1}), {_id: 1});
+
+// verify that secondaries are not holding any transactional lock resources.
+primaryColl.drop();
+rst.awaitReplication();
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index c500deb93ac..9c1ccebdc01 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -55,11 +55,14 @@ MONGO_FAIL_POINT_DEFINE(applyOpsHangBeforePreparingTransaction);
// Failpoint that will cause reconstructPreparedTransactions to return early.
MONGO_FAIL_POINT_DEFINE(skipReconstructPreparedTransactions);
+// Failpoint that causes apply prepare transaction oplog entry's ops to fail with write
+// conflict error.
+MONGO_FAIL_POINT_DEFINE(applyPrepareTxnOpsFailsWithWriteConflict);
// Apply the oplog entries for a prepare or a prepared commit during recovery/initial sync.
Status _applyOperationsForTransaction(OperationContext* opCtx,
const std::vector<OplogEntry>& ops,
- repl::OplogApplication::Mode oplogApplicationMode) {
+ repl::OplogApplication::Mode oplogApplicationMode) noexcept {
// Apply each the operations via repl::applyOperation.
for (const auto& op : ops) {
try {
@@ -408,32 +411,57 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
opCtx->setLogicalSessionId(*entry.getSessionId());
opCtx->setTxnNumber(*entry.getTxnNumber());
opCtx->setInMultiDocumentTransaction();
- // The write on transaction table may be applied concurrently, so refreshing state
- // from disk may read that write, causing starting a new transaction on an existing
- // txnNumber. Thus, we start a new transaction without refreshing state from disk.
- MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);
-
- auto transaction = TransactionParticipant::get(opCtx);
- transaction.unstashTransactionResources(opCtx, "prepareTransaction");
-
- // Set this in case the application of any ops need to use the prepare timestamp of this
- // transaction. It should be cleared automatically when the transaction finishes.
- if (mode == repl::OplogApplication::Mode::kRecovering) {
- transaction.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime());
- }
- auto status = _applyOperationsForTransaction(opCtx, ops, mode);
- fassert(31137, status);
+ return writeConflictRetry(opCtx, "applying prepare transaction", entry.getNss().ns(), [&] {
+ // The write on transaction table may be applied concurrently, so refreshing state
+ // from disk may read that write, causing starting a new transaction on an existing
+ // txnNumber. Thus, we start a new transaction without refreshing state from disk.
+ MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);
+
+ auto txnParticipant = TransactionParticipant::get(opCtx);
+
+ // Release the WUOW, transaction lock resources and abort storage transaction so that the
+ // writeConflictRetry loop will be able to retry applying transactional ops on WCE error.
+ auto abortOnError = makeGuard([&txnParticipant, opCtx] {
+ // Abort the transaction and invalidate the session it is associated with.
+ txnParticipant.abortTransaction(opCtx);
+ txnParticipant.invalidate(opCtx);
+ });
+
+ // Starts the WUOW.
+ txnParticipant.unstashTransactionResources(opCtx, "prepareTransaction");
+
+ // Set this in case the application of any ops need to use the prepare timestamp of this
+ // transaction. It should be cleared automatically when the transaction finishes.
+ if (mode == repl::OplogApplication::Mode::kRecovering) {
+ txnParticipant.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime());
+ }
- if (MONGO_unlikely(applyOpsHangBeforePreparingTransaction.shouldFail())) {
- LOGV2(21847, "Hit applyOpsHangBeforePreparingTransaction failpoint");
- applyOpsHangBeforePreparingTransaction.pauseWhileSet(opCtx);
- }
+ auto status = _applyOperationsForTransaction(opCtx, ops, mode);
- transaction.prepareTransaction(opCtx, entry.getOpTime());
- transaction.stashTransactionResources(opCtx);
+ if (MONGO_unlikely(applyPrepareTxnOpsFailsWithWriteConflict.shouldFail())) {
+ LOGV2(4947101, "Hit applyPrepareTxnOpsFailsWithWriteConflict failpoint");
+ status = Status(ErrorCodes::WriteConflict,
+ "Prepare transaction apply ops failed due to write conflict");
+ }
- return Status::OK();
+ if (status == ErrorCodes::WriteConflict) {
+ throw WriteConflictException();
+ }
+ fassert(31137, status);
+
+ if (MONGO_unlikely(applyOpsHangBeforePreparingTransaction.shouldFail())) {
+ LOGV2(21847, "Hit applyOpsHangBeforePreparingTransaction failpoint");
+ applyOpsHangBeforePreparingTransaction.pauseWhileSet(opCtx);
+ }
+
+ txnParticipant.prepareTransaction(opCtx, entry.getOpTime());
+ // Prepare transaction success.
+ abortOnError.dismiss();
+
+ txnParticipant.stashTransactionResources(opCtx);
+ return Status::OK();
+ });
}
/**