summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuganthi Mani <suganthi.mani@mongodb.com>2020-07-15 21:50:28 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-07-16 02:12:46 +0000
commit7c23f84809b9d28dd54dbf8fc514568238bb0020 (patch)
tree9b56d3edb8a1574c58fabbf44f19c6844c481dee
parent02e0fc0714f54e4a53895699411c3d19f776dffe (diff)
downloadmongo-7c23f84809b9d28dd54dbf8fc514568238bb0020.tar.gz
SERVER-49471 Retry on WT_ROLLBACK (WriteConflictException) when applying prepareTransaction oplog entry.
(cherry picked from commit 525a83552575efba3274bebb2d4f94b8554c5e16) SERVER-46049 Have _applyOperationsForTransaction() return a BadStatus instead of throwing (cherry picked from commit 6840394ee0a015939ac7b0497d27fbfe8dda71e4)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/replsets/apply_prepare_txn_write_conflict_robustness.js56
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp91
3 files changed, 125 insertions, 26 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 772495637a4..8e586b3b98a 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -26,6 +26,10 @@ concurrency_replication_multiversion:
concurrency_sharded_replication_multiversion:
+replica_sets_multiversion:
+- ticket: SERVER-49471
+ test_file: jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
+
replica_sets_jscore_multiversion_passthrough:
- ticket: SERVER-44260
test_file: jstests/core/txns/new_transaction_waits_for_previous_txn_table_updates.js
diff --git a/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
new file mode 100644
index 00000000000..62e98879d48
--- /dev/null
+++ b/jstests/replsets/apply_prepare_txn_write_conflict_robustness.js
@@ -0,0 +1,56 @@
+/*
+ * Tests that WT_ROLLBACK (WriteConflictException) error gets retried when applying
+ * prepareTransaction oplog entry on secondaries.
+ *
+ * @tags: [uses_transactions, uses_prepare_transaction]
+ */
+(function() {
+
+"use strict";
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+const dbName = jsTest.name();
+const collName = "coll";
+
+var rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const secondary = rst.getSecondary();
+const primaryDB = primary.getDB(dbName);
+const primaryColl = primaryDB[collName];
+
+jsTestLog("Do a document write");
+assert.commandWorked(
+        primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
+
+// Enable fail point on secondary to cause apply prepare transaction oplog entry's ops to fail
+// with write conflict error at least once.
+assert.commandWorked(secondary.adminCommand(
+ {configureFailPoint: "applyPrepareTxnOpsFailsWithWriteConflict", mode: {times: 1}}));
+
+jsTestLog("Start transaction");
+let session = primary.startSession();
+let sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction({writeConcern: {w: "majority"}});
+assert.commandWorked(sessionColl.insert({_id: 1}));
+
+// PrepareTransaction cmd will be successful only if secondary is able to retry applying
+// prepareTransaction oplog entry on WT_ROLLBACK (WriteConflictException) error.
+jsTestLog("Prepare transaction");
+let prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+
+jsTestLog("Commit transaction");
+assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp));
+
+// Verify that the committed transaction data is present on secondary.
+assert.eq(secondary.getDB(dbName)[collName].findOne({_id: 1}), {_id: 1});
+
+// verify that secondaries are not holding any transactional lock resources.
+primaryColl.drop();
+rst.awaitReplication();
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index eb2a44af338..4edf468b8b0 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -56,11 +56,14 @@ MONGO_FAIL_POINT_DEFINE(applyOpsHangBeforePreparingTransaction);
// Failpoint that will cause reconstructPreparedTransactions to return early.
MONGO_FAIL_POINT_DEFINE(skipReconstructPreparedTransactions);
+// Failpoint that causes apply prepare transaction oplog entry's ops to fail with write
+// conflict error.
+MONGO_FAIL_POINT_DEFINE(applyPrepareTxnOpsFailsWithWriteConflict);
// Apply the oplog entries for a prepare or a prepared commit during recovery/initial sync.
Status _applyOperationsForTransaction(OperationContext* opCtx,
const repl::MultiApplier::Operations& ops,
- repl::OplogApplication::Mode oplogApplicationMode) {
+ repl::OplogApplication::Mode oplogApplicationMode) noexcept {
// Apply each the operations via repl::applyOperation.
for (const auto& op : ops) {
try {
@@ -70,10 +73,21 @@ Status _applyOperationsForTransaction(OperationContext* opCtx,
if (!status.isOK()) {
return status;
}
- } catch (const ExceptionFor<ErrorCodes::NamespaceNotFound>&) {
- if (oplogApplicationMode != repl::OplogApplication::Mode::kInitialSync &&
- oplogApplicationMode != repl::OplogApplication::Mode::kRecovering)
- throw;
+ } catch (const DBException& ex) {
+ // Ignore NamespaceNotFound errors if we are in initial sync or recovering mode.
+ const bool ignoreException = ex.code() == ErrorCodes::NamespaceNotFound &&
+ (oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync ||
+ oplogApplicationMode == repl::OplogApplication::Mode::kRecovering);
+
+ if (!ignoreException) {
+ LOG(1) << "Error applying operation in transaction. " << redact(ex)
+ << "- oplog entry: " << redact(op.toBSON());
+ return exceptionToStatus();
+ }
+ LOG(1) << "Encountered but ignoring error: " << redact(ex)
+ << " while applying operations for transaction because we are either in initial "
+ "sync or recovering mode - oplog entry: "
+ << redact(op.toBSON());
}
}
return Status::OK();
@@ -342,33 +356,58 @@ Status _applyPrepareTransaction(OperationContext* opCtx,
opCtx->setTxnNumber(*entry.getTxnNumber());
opCtx->setInMultiDocumentTransaction();
- // The write on transaction table may be applied concurrently, so refreshing state
- // from disk may read that write, causing starting a new transaction on an existing
- // txnNumber. Thus, we start a new transaction without refreshing state from disk.
- MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);
+ return writeConflictRetry(opCtx, "applying prepare transaction", entry.getNss().ns(), [&] {
+ // The write on transaction table may be applied concurrently, so refreshing state
+ // from disk may read that write, causing starting a new transaction on an existing
+ // txnNumber. Thus, we start a new transaction without refreshing state from disk.
+ MongoDOperationContextSessionWithoutRefresh sessionCheckout(opCtx);
+
+ auto txnParticipant = TransactionParticipant::get(opCtx);
+
+ // Release the WUOW, transaction lock resources and abort storage transaction so that the
+ // writeConflictRetry loop will be able to retry applying transactional ops on WCE error.
+ auto abortOnError = makeGuard([&txnParticipant, opCtx] {
+ // Abort the transaction and invalidate the session it is associated with.
+ txnParticipant.abortTransaction(opCtx);
+ txnParticipant.invalidate(opCtx);
+ });
+
+ // Starts the WUOW.
+ txnParticipant.unstashTransactionResources(opCtx, "prepareTransaction");
+
+ // Set this in case the application of any ops need to use the prepare timestamp of this
+ // transaction. It should be cleared automatically when the transaction finishes.
+ if (mode == repl::OplogApplication::Mode::kRecovering) {
+ txnParticipant.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime());
+ }
- auto transaction = TransactionParticipant::get(opCtx);
- transaction.unstashTransactionResources(opCtx, "prepareTransaction");
+ auto status = _applyOperationsForTransaction(opCtx, ops, mode);
- // Set this in case the application of any ops need to use the prepare timestamp of this
- // transaction. It should be cleared automatically when the transaction finishes.
- if (mode == repl::OplogApplication::Mode::kRecovering) {
- transaction.setPrepareOpTimeForRecovery(opCtx, entry.getOpTime());
- }
+ if (MONGO_FAIL_POINT(applyPrepareTxnOpsFailsWithWriteConflict)) {
+ LOG(0) << "Hit applyPrepareTxnOpsFailsWithWriteConflict failpoint";
+ status = Status(ErrorCodes::WriteConflict,
+ "Prepare transaction apply ops failed due to write conflict");
+ }
- auto status = _applyOperationsForTransaction(opCtx, ops, mode);
- fassert(31137, status);
- if (MONGO_FAIL_POINT(applyOpsHangBeforePreparingTransaction)) {
- LOG(0) << "Hit applyOpsHangBeforePreparingTransaction failpoint";
- MONGO_FAIL_POINT_PAUSE_WHILE_SET_OR_INTERRUPTED(opCtx,
- applyOpsHangBeforePreparingTransaction);
- }
+ if (status == ErrorCodes::WriteConflict) {
+ throw WriteConflictException();
+ }
+ fassert(31137, status);
- transaction.prepareTransaction(opCtx, entry.getOpTime());
- transaction.stashTransactionResources(opCtx);
+ if (MONGO_FAIL_POINT(applyOpsHangBeforePreparingTransaction)) {
+ LOG(0) << "Hit applyOpsHangBeforePreparingTransaction failpoint";
+ MONGO_FAIL_POINT_PAUSE_WHILE_SET_OR_INTERRUPTED(opCtx,
+ applyOpsHangBeforePreparingTransaction);
+ }
- return Status::OK();
+ txnParticipant.prepareTransaction(opCtx, entry.getOpTime());
+ // Prepare transaction success.
+ abortOnError.dismiss();
+
+ txnParticipant.stashTransactionResources(opCtx);
+ return Status::OK();
+ });
}
/**