diff options
author | Suganthi Mani <suganthi.mani@mongodb.com> | 2020-07-15 23:16:08 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-16 03:38:36 +0000 |
commit | 821acd4c97c24936a12af9d7b1a077714229ee3f (patch) | |
tree | f988bac723e71fea0e14a5ff45f77eb15a480ce5 | |
parent | 841fadf7603f11a4ea66217788c5cd763ca84a0e (diff) | |
download | mongo-821acd4c97c24936a12af9d7b1a077714229ee3f.tar.gz |
SERVER-49471 Retry on WT_ROLLBACK (WriteConflictException) when applying prepareTransaction oplog entry.
(cherry picked from commit 525a83552575efba3274bebb2d4f94b8554c5e16)
SERVER-46049 Have _applyOperationsForTransaction() return a BadStatus instead of throwing
(cherry picked from commit 6840394ee0a015939ac7b0497d27fbfe8dda71e4)
3 files changed, 123 insertions, 26 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 772495637a4..f52fd671078 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -61,6 +61,8 @@ replica_sets_multiversion: test_file: jstests/replsets/reconfig_uses_default_protocolVersion.js - ticket: SERVER-47190 test_file: jstests/replsets/force_shutdown_primary.js +- ticket: SERVER-49471 + test_file: jstests/replsets/apply_prepare_txn_write_conflict_robustness.js sharding_multiversion: - ticket: SERVER-38691 diff --git a/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js new file mode 100644 index 00000000000..62e98879d48 --- /dev/null +++ b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js @@ -0,0 +1,56 @@ +/* + * Tests that WT_ROLLBACK (WriteConflictException) error gets retried when applying + * prepareTransaction oplog entry on secondaries. + * + * @tags: [uses_transactions, uses_prepare_transaction] + */ +(function() { + +"use strict"; +load("jstests/core/txns/libs/prepare_helpers.js"); + +const dbName = jsTest.name(); +const collName = "coll"; + +var rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const secondary = rst.getSecondary(); +const primaryDB = primary.getDB(dbName); +const primaryColl = primaryDB[collName]; + +jsTestLog("Do a document write"); +assert.commandWorked( + primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}})); + +// Enable fail point on secondary to cause apply prepare transaction oplog entry's ops to fail +// with write conflict error at least once. +assert.commandWorked(secondary.adminCommand( + {configureFailPoint: "applyPrepareTxnOpsFailsWithWriteConflict", mode: {times: 1}})); + +jsTestLog("Start transaction"); +let session = primary.startSession(); +let sessionDB = session.getDatabase(dbName); +const sessionColl = sessionDB.getCollection(collName); +session.startTransaction({writeConcern: {w: "majority"}}); +assert.commandWorked(sessionColl.insert({_id: 1})); + +// PrepareTransaction cmd will be successful only if secondary is able to retry applying +// prepareTransaction oplog entry on WT_ROLLBACK (WriteConflictException) error. +jsTestLog("Prepare transaction"); +let prepareTimestamp = PrepareHelpers.prepareTransaction(session); + +jsTestLog("Commit transaction"); +assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp)); + +// Verify that the committed transaction data is present on secondary. +assert.eq(secondary.getDB(dbName)[collName].findOne({_id: 1}), {_id: 1}); + +// verify that secondaries are not holding any transactional lock resources. +primaryColl.drop(); +rst.awaitReplication(); + +rst.stopSet(); +})(); diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index eb2a44af338..4edf468b8b0 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -56,11 +56,14 @@ MONGO_FAIL_POINT_DEFINE(applyOpsHangBeforePreparingTransaction); // Failpoint that will cause reconstructPreparedTransactions to return early. MONGO_FAIL_POINT_DEFINE(skipReconstructPreparedTransactions); +// Failpoint that causes apply prepare transaction oplog entry's ops to fail with write +// conflict error. +MONGO_FAIL_POINT_DEFINE(applyPrepareTxnOpsFailsWithWriteConflict); // Apply the oplog entries for a prepare or a prepared commit during recovery/initial sync. Status _applyOperationsForTransaction(OperationContext* opCtx, const repl::MultiApplier::Operations& ops, - repl::OplogApplication::Mode oplogApplicationMode) { + repl::OplogApplication::Mode oplogApplicationMode) noexcept { // Apply each the operations via repl::applyOperation. for (const auto& op : ops) { try { @@ -70,10 +73,21 @@ Status _applyOperationsForTransaction(OperationContext* opCtx, if (!status.isOK()) { return status; } - } catch (const ExceptionFor<ErrorCodes::NamespaceNotFound>&) { - if (oplogApplicationMode != repl::OplogApplication::Mode::kInitialSync && - oplogApplicationMode != repl::OplogApplication::Mode::kRecovering) - throw; + } catch (const DBException& ex) { + // Ignore NamespaceNotFound errors if we are in initial sync or recovering mode. + const bool ignoreException = ex.code() == ErrorCodes::NamespaceNotFound && + (oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync || + oplogApplicationMode == repl::OplogApplication::Mode::kRecovering); + + if (!ignoreException) { + LOG(1) << "Error applying operation in transaction. " << redact(ex) + << "- oplog entry: " << redact(op.toBSON()); + return exceptionToStatus(); + } + LOG(1) << "Encountered but ignoring error: " << redact(ex) + << " while applying operations for transaction because we are either in initial " + "sync or recovering mode - oplog entry: " + << redact(op.toBSON()); } } return Status::OK(); @@ -342,33 +356,58 @@ Status _applyPrepareTransaction(OperationContext* opCtx, opCtx->setTxnNumber(*entry.getTxnNumber()); opCtx->setInMultiDocumentTransaction(); - // The write on transaction table may be applied concurrently, so refreshing state - // from disk may read that write, causing starting a new transaction on an existing - // txnNumber. Thus, we start a new transaction without refreshing state from disk. - MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx); + return writeConflictRetry(opCtx, "applying prepare transaction", entry.getNss().ns(), [&] { + // The write on transaction table may be applied concurrently, so refreshing state + // from disk may read that write, causing starting a new transaction on an existing + // txnNumber. Thus, we start a new transaction without refreshing state from disk. + MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx); + + auto txnParticipant = TransactionParticipant::get(opCtx); + + // Release the WUOW, transaction lock resources and abort storage transaction so that the + // writeConflictRetry loop will be able to retry applying transactional ops on WCE error. + auto abortOnError = makeGuard([&txnParticipant, opCtx] { + // Abort the transaction and invalidate the session it is associated with. + txnParticipant.abortTransaction(opCtx); + txnParticipant.invalidate(opCtx); + }); + + // Starts the WUOW. + txnParticipant.unstashTransactionResources(opCtx, "prepareTransaction"); + + // Set this in case the application of any ops need to use the prepare timestamp of this + // transaction. It should be cleared automatically when the transaction finishes. + if (mode == repl::OplogApplication::Mode::kRecovering) { + txnParticipant.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime()); + } - auto transaction = TransactionParticipant::get(opCtx); - transaction.unstashTransactionResources(opCtx, "prepareTransaction"); + auto status = _applyOperationsForTransaction(opCtx, ops, mode); - // Set this in case the application of any ops need to use the prepare timestamp of this - // transaction. It should be cleared automatically when the transaction finishes. - if (mode == repl::OplogApplication::Mode::kRecovering) { - transaction.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime()); - } + if (MONGO_FAIL_POINT(applyPrepareTxnOpsFailsWithWriteConflict)) { + LOG(0) << "Hit applyPrepareTxnOpsFailsWithWriteConflict failpoint"; + status = Status(ErrorCodes::WriteConflict, + "Prepare transaction apply ops failed due to write conflict"); + } - auto status = _applyOperationsForTransaction(opCtx, ops, mode); - fassert(31137, status); - if (MONGO_FAIL_POINT(applyOpsHangBeforePreparingTransaction)) { - LOG(0) << "Hit applyOpsHangBeforePreparingTransaction failpoint"; - MONGO_FAIL_POINT_PAUSE_WHILE_SET_OR_INTERRUPTED(opCtx, - applyOpsHangBeforePreparingTransaction); - } + if (status == ErrorCodes::WriteConflict) { + throw WriteConflictException(); + } + fassert(31137, status); - transaction.prepareTransaction(opCtx, entry.getOpTime()); - transaction.stashTransactionResources(opCtx); + if (MONGO_FAIL_POINT(applyOpsHangBeforePreparingTransaction)) { + LOG(0) << "Hit applyOpsHangBeforePreparingTransaction failpoint"; + MONGO_FAIL_POINT_PAUSE_WHILE_SET_OR_INTERRUPTED(opCtx, + applyOpsHangBeforePreparingTransaction); + } - return Status::OK(); + txnParticipant.prepareTransaction(opCtx, entry.getOpTime()); + // Prepare transaction success. + abortOnError.dismiss(); + + txnParticipant.stashTransactionResources(opCtx); + return Status::OK(); + }); } /** |