diff options
author | Suganthi Mani <suganthi.mani@mongodb.com> | 2020-07-14 08:40:28 -0400 |
---|---|---|
committer | Suganthi Mani <suganthi.mani@mongodb.com> | 2020-07-15 22:59:08 -0400 |
commit | 31b08e7d7e095132b7db3e8f4a4271478cf353ab (patch) | |
tree | eb2c9d6002449198f5e13b1d9ba85f3329ffb9a6 | |
parent | 5c8f1ef2c41f056b2a83d930dd7e8f8180b41224 (diff) | |
download | mongo-31b08e7d7e095132b7db3e8f4a4271478cf353ab.tar.gz |
SERVER-49471 Retry on WT_ROLLBACK (WriteConflictException) when applying prepareTransaction oplog entry.
(cherry picked from commit 525a83552575efba3274bebb2d4f94b8554c5e16)
4 files changed, 110 insertions, 23 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml index 912c5cf419a..555630879b9 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multiversion.yml @@ -7,6 +7,7 @@ selector: - multiversion_incompatible - requires_fcv_44 exclude_files: + - jstests/replsets/apply_prepare_txn_write_conflict_robustness.js executor: config: shell_options: diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 21e3fa78231..b29b8647eff 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -71,6 +71,8 @@ replica_sets_multiversion: test_file: jstests/replsets/force_shutdown_primary.js - ticket: SERVER-47390 test_file: jstests/replsets/disallow_adding_initialized_node1.js +- ticket: SERVER-49471 + test_file: jstests/replsets/apply_prepare_txn_write_conflict_robustness.js sharding_multiversion: - ticket: SERVER-38691 diff --git a/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js new file mode 100644 index 00000000000..62e98879d48 --- /dev/null +++ b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js @@ -0,0 +1,56 @@ +/* + * Tests that WT_ROLLBACK (WriteConflictException) error gets retried when applying + * prepareTransaction oplog entry on secondaries. + * + * @tags: [uses_transactions, uses_prepare_transaction] + */ +(function() { + +"use strict"; +load("jstests/core/txns/libs/prepare_helpers.js"); + +const dbName = jsTest.name(); +const collName = "coll"; + +var rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const secondary = rst.getSecondary(); +const primaryDB = primary.getDB(dbName); +const primaryColl = primaryDB[collName]; + +jsTestLog("Do a document write"); +assert.commandWorked( + primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}})); + +// Enable fail point on secondary to cause apply prepare transaction oplog entry's ops to fail +// with write conflict error at least once. +assert.commandWorked(secondary.adminCommand( + {configureFailPoint: "applyPrepareTxnOpsFailsWithWriteConflict", mode: {times: 1}})); + +jsTestLog("Start transaction"); +let session = primary.startSession(); +let sessionDB = session.getDatabase(dbName); +const sessionColl = sessionDB.getCollection(collName); +session.startTransaction({writeConcern: {w: "majority"}}); +assert.commandWorked(sessionColl.insert({_id: 1})); + +// PrepareTransaction cmd will be successful only if secondary is able to retry applying +// prepareTransaction oplog entry on WT_ROLLBACK (WriteConflictException) error. +jsTestLog("Prepare transaction"); +let prepareTimestamp = PrepareHelpers.prepareTransaction(session); + +jsTestLog("Commit transaction"); +assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp)); + +// Verify that the committed transaction data is present on secondary. +assert.eq(secondary.getDB(dbName)[collName].findOne({_id: 1}), {_id: 1}); + +// verify that secondaries are not holding any transactional lock resources. +primaryColl.drop(); +rst.awaitReplication(); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index 8608926919c..b4d824607eb 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -56,11 +56,14 @@ MONGO_FAIL_POINT_DEFINE(applyOpsHangBeforePreparingTransaction); // Failpoint that will cause reconstructPreparedTransactions to return early. MONGO_FAIL_POINT_DEFINE(skipReconstructPreparedTransactions); +// Failpoint that causes apply prepare transaction oplog entry's ops to fail with write +// conflict error. +MONGO_FAIL_POINT_DEFINE(applyPrepareTxnOpsFailsWithWriteConflict); // Apply the oplog entries for a prepare or a prepared commit during recovery/initial sync. Status _applyOperationsForTransaction(OperationContext* opCtx, const std::vector<OplogEntry>& ops, - repl::OplogApplication::Mode oplogApplicationMode) { + repl::OplogApplication::Mode oplogApplicationMode) noexcept { // Apply each the operations via repl::applyOperation. for (const auto& op : ops) { try { @@ -409,32 +412,57 @@ Status _applyPrepareTransaction(OperationContext* opCtx, opCtx->setLogicalSessionId(*entry.getSessionId()); opCtx->setTxnNumber(*entry.getTxnNumber()); opCtx->setInMultiDocumentTransaction(); - // The write on transaction table may be applied concurrently, so refreshing state - // from disk may read that write, causing starting a new transaction on an existing - // txnNumber. Thus, we start a new transaction without refreshing state from disk. - MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx); - - auto transaction = TransactionParticipant::get(opCtx); - transaction.unstashTransactionResources(opCtx, "prepareTransaction"); - - // Set this in case the application of any ops need to use the prepare timestamp of this - // transaction. It should be cleared automatically when the transaction finishes. - if (mode == repl::OplogApplication::Mode::kRecovering) { - transaction.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime()); - } - auto status = _applyOperationsForTransaction(opCtx, ops, mode); - fassert(31137, status); + return writeConflictRetry(opCtx, "applying prepare transaction", entry.getNss().ns(), [&] { + // The write on transaction table may be applied concurrently, so refreshing state + // from disk may read that write, causing starting a new transaction on an existing + // txnNumber. Thus, we start a new transaction without refreshing state from disk. + MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx); + + auto txnParticipant = TransactionParticipant::get(opCtx); + + // Release the WUOW, transaction lock resources and abort storage transaction so that the + // writeConflictRetry loop will be able to retry applying transactional ops on WCE error. + auto abortOnError = makeGuard([&txnParticipant, opCtx] { + // Abort the transaction and invalidate the session it is associated with. + txnParticipant.abortTransaction(opCtx); + txnParticipant.invalidate(opCtx); + }); + + // Starts the WUOW. + txnParticipant.unstashTransactionResources(opCtx, "prepareTransaction"); + + // Set this in case the application of any ops need to use the prepare timestamp of this + // transaction. It should be cleared automatically when the transaction finishes. + if (mode == repl::OplogApplication::Mode::kRecovering) { + txnParticipant.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime()); + } - if (MONGO_unlikely(applyOpsHangBeforePreparingTransaction.shouldFail())) { - LOGV2(21847, "Hit applyOpsHangBeforePreparingTransaction failpoint"); - applyOpsHangBeforePreparingTransaction.pauseWhileSet(opCtx); - } + auto status = _applyOperationsForTransaction(opCtx, ops, mode); - transaction.prepareTransaction(opCtx, entry.getOpTime()); - transaction.stashTransactionResources(opCtx); + if (MONGO_unlikely(applyPrepareTxnOpsFailsWithWriteConflict.shouldFail())) { + LOGV2(4947101, "Hit applyPrepareTxnOpsFailsWithWriteConflict failpoint"); + status = Status(ErrorCodes::WriteConflict, + "Prepare transaction apply ops failed due to write conflict"); + } - return Status::OK(); + if (status == ErrorCodes::WriteConflict) { + throw WriteConflictException(); + } + fassert(31137, status); + + if (MONGO_unlikely(applyOpsHangBeforePreparingTransaction.shouldFail())) { + LOGV2(21847, "Hit applyOpsHangBeforePreparingTransaction failpoint"); + applyOpsHangBeforePreparingTransaction.pauseWhileSet(opCtx); + } + + txnParticipant.prepareTransaction(opCtx, entry.getOpTime()); + // Prepare transaction success. + abortOnError.dismiss(); + + txnParticipant.stashTransactionResources(opCtx); + return Status::OK(); + }); } /** |