diff options
author | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2019-01-21 19:11:41 -0500 |
---|---|---|
committer | Siyuan Zhou <siyuan.zhou@mongodb.com> | 2019-01-24 22:49:49 -0500 |
commit | d1933f0b1b88c4c2ad0f2fdd6ab106f7eeacddfa (patch) | |
tree | f28cbc8eb44fa4363c9824e0e772014c0306f433 | |
parent | ec4520d72b2a4ff6ba980e913c988b11e7d188a4 (diff) | |
download | mongo-d1933f0b1b88c4c2ad0f2fdd6ab106f7eeacddfa.tar.gz |
SERVER-38282 Yield locks for prepared transactions on stepdown.
-rw-r--r-- | jstests/core/txns/libs/prepare_helpers.js | 2 | ||||
-rw-r--r-- | jstests/replsets/prepared_transaction_on_failover.js | 91 | ||||
-rw-r--r-- | src/mongo/db/kill_sessions_local.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/kill_sessions_local.h | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 15 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/session_catalog_mongod.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/transaction_participant.cpp | 7 |
9 files changed, 162 insertions, 3 deletions
diff --git a/jstests/core/txns/libs/prepare_helpers.js b/jstests/core/txns/libs/prepare_helpers.js index 56927af8b56..d157fc351ce 100644 --- a/jstests/core/txns/libs/prepare_helpers.js +++ b/jstests/core/txns/libs/prepare_helpers.js @@ -40,7 +40,7 @@ const PrepareHelpers = (function() { // End the transaction on the shell session. if (res.ok) { - session.commitTransaction(); + session.commitTransaction_forTesting(); } else { session.abortTransaction_forTesting(); } diff --git a/jstests/replsets/prepared_transaction_on_failover.js b/jstests/replsets/prepared_transaction_on_failover.js new file mode 100644 index 00000000000..faec21bc52b --- /dev/null +++ b/jstests/replsets/prepared_transaction_on_failover.js @@ -0,0 +1,91 @@ +/** + * Tests prepared transactions can survive failover and commit on a new primary. + * + * @tags: [uses_transactions, uses_prepare_transaction] + */ +(function() { + "use strict"; + load("jstests/core/txns/libs/prepare_helpers.js"); + load("jstests/replsets/rslib.js"); // For reconnect() + + const replTest = new ReplSetTest({nodes: 2}); + replTest.startSet(); + replTest.initiate(); + + const dbName = jsTest.name(); + const collName = "coll"; + const otherDbName = dbName + "_other"; + + function testTransactionsWithFailover(stepDownFunction) { + const primary = replTest.getPrimary(); + const newPrimary = replTest.getSecondary(); + const testDB = primary.getDB(dbName); + + testDB.dropDatabase(); + testDB.getSiblingDB(otherDbName).dropDatabase(); + assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}})); + + jsTestLog("Starting transaction"); + const session = primary.startSession({causalConsistency: false}); + const sessionDB = session.getDatabase(dbName); + session.startTransaction({writeConcern: {w: "majority"}}); + + const doc = {_id: "txn on primary " + primary}; + assert.commandWorked(sessionDB.getCollection(collName).insert(doc)); + + jsTestLog("Putting transaction into prepare"); + const prepareTimestamp = PrepareHelpers.prepareTransaction(session); + replTest.awaitReplication(); + + stepDownFunction(); + reconnect(primary); + + jsTestLog("Waiting for the other node to run for election and become primary"); + assert.eq(replTest.getPrimary(), newPrimary); + + jsTestLog("Creating an unrelated collection"); + // Application of an unrelated DDL command needs a strong lock on secondary. Make sure + // the prepared transactions have yielded their locks on secondary. + assert.commandWorked(newPrimary.getDB(otherDbName).runCommand({create: collName})); + replTest.awaitReplication(); + + jsTestLog("Dropping the collection in use cannot acquire the lock"); + assert.commandFailedWithCode( + newPrimary.getDB(testDB).runCommand({drop: collName, maxTimeMS: 1000}), + ErrorCodes.MaxTimeMSExpired); + + jsTestLog("Committing transaction on the new primary"); + // Create a proxy session to reuse the session state of the old primary. + const newSession = new _DelegatingDriverSession(newPrimary, session); + + assert.commandWorked( + PrepareHelpers.commitTransactionAfterPrepareTS(newSession, prepareTimestamp)); + replTest.awaitReplication(); + + assert.docEq(doc, testDB.getCollection(collName).findOne()); + assert.docEq(doc, newPrimary.getDB(dbName).getCollection(collName).findOne()); + + jsTestLog("Running another transaction on the new primary"); + const secondSession = newPrimary.startSession({causalConsistency: false}); + secondSession.startTransaction({writeConcern: {w: "majority"}}); + assert.commandWorked( + secondSession.getDatabase(dbName).getCollection(collName).insert({_id: "second-doc"})); + secondSession.commitTransaction(); + } + + function stepDownViaHeartbeat() { + jsTestLog("Stepping down primary via heartbeat"); + replTest.stepUp(replTest.getSecondary()); + } + testTransactionsWithFailover(stepDownViaHeartbeat); + + function stepDownViaCommand() { + jsTestLog("Stepping down primary via command"); + assert.throws(function() { + replTest.getPrimary().adminCommand({replSetStepDown: 10}); + }); + } + testTransactionsWithFailover(stepDownViaCommand); + + replTest.stopSet(); +})(); diff --git a/src/mongo/db/kill_sessions_local.cpp b/src/mongo/db/kill_sessions_local.cpp index 8f86d2e7339..ed16522171d 100644 --- a/src/mongo/db/kill_sessions_local.cpp +++ b/src/mongo/db/kill_sessions_local.cpp @@ -178,4 +178,36 @@ void killSessionsAbortAllPreparedTransactions(OperationContext* opCtx) { }); } +void yieldLocksForPreparedTransactions(OperationContext* opCtx) { + // Create a new opCtx because we need an empty locker to refresh the locks. + auto newClient = opCtx->getServiceContext()->makeClient("prepared-txns-yield-locks"); + AlternativeClientRegion acr(newClient); + auto newOpCtx = cc().makeOperationContext(); + + // Scan the sessions again to get the list of all sessions with prepared transaction + // to yield their locks. + SessionKiller::Matcher matcherAllSessions( + KillAllSessionsByPatternSet{makeKillAllSessionsByPattern(newOpCtx.get())}); + killSessionsAction( + newOpCtx.get(), + matcherAllSessions, + [](const ObservableSession& session) { + return TransactionParticipant::get(session.get())->transactionIsPrepared(); + }, + [](OperationContext* killerOpCtx, const SessionToKill& session) { + auto const txnParticipant = TransactionParticipant::get(session.get()); + // Yield locks for prepared transactions. + // When scanning and killing operations, all prepared transactions are included in the + // list. Even though new sessions may be created after the scan, none of them can become + // prepared during stepdown, since the RSTL has been enqueued, preventing any new + // writes. + if (txnParticipant->transactionIsPrepared()) { + LOG(3) << "Yielding locks of prepared transaction. SessionId: " + << session.getSessionId().getId() + << " TxnNumber: " << txnParticipant->getActiveTxnNumber(); + txnParticipant->refreshLocksForPreparedTransaction(killerOpCtx, true); + } + }); +} + } // namespace mongo diff --git a/src/mongo/db/kill_sessions_local.h b/src/mongo/db/kill_sessions_local.h index d8eeb39303e..b439f5c5273 100644 --- a/src/mongo/db/kill_sessions_local.h +++ b/src/mongo/db/kill_sessions_local.h @@ -66,4 +66,9 @@ void killSessionsLocalShutdownAllTransactions(OperationContext* opCtx); */ void killSessionsAbortAllPreparedTransactions(OperationContext* opCtx); +/** + * Yields locks of prepared transactions. + */ +void yieldLocksForPreparedTransactions(OperationContext* opCtx); + } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 765f3ee6db0..560e2d61338 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1974,6 +1974,10 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, } // Stepdown success! + + // Yield locks for prepared transactions. + yieldLocksForPreparedTransactions(opCtx); + onExitGuard.dismiss(); updateMemberState(); diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index a987b27e0d4..be46d90817d 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -474,6 +474,21 @@ private: void startKillOpThread(); /** + * On stepdown, we need to kill all write operations and all transactional operations, + * so that unprepared and prepared transactions can release or yield their locks. + * The required ordering between stepdown steps is: + * 1) Enqueue RSTL in X mode. + * 2) Kill all write operations and operations with S locks + * 3) Abort unprepared transactions. + * 4) Repeat step 2) and 3) until the stepdown thread can acquire RSTL. + * 5) Yield locks of all prepared transactions. + * + * Since prepared transactions don't hold RSTL, step 1) to step 3) make sure all + * running transactions that may hold RSTL finish, get killed or yield their locks, + * so that we can acquire RSTL at step 4). Holding the locks of prepared transactions + * until step 5) guarantees if any conflict operations (e.g. DDL operations) failed + * to be killed for any reason, we will get a deadlock instead of a silent data corruption. + * * Loops continuously to kill all user operations that have global lock except in IS mode. * And, aborts all stashed (inactive) transactions. * Terminates once killSignaled is set true. diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 39bd6cceffa..ece31e5739d 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -40,6 +40,7 @@ #include "mongo/base/status.h" #include "mongo/db/concurrency/replication_state_transition_lock_guard.h" +#include "mongo/db/kill_sessions_local.h" #include "mongo/db/logical_clock.h" #include "mongo/db/logical_time_validator.h" #include "mongo/db/operation_context.h" @@ -396,6 +397,9 @@ void ReplicationCoordinatorImpl::_stepDownFinish( rstlLock.waitForLockUntil(Date_t::max()); } + // Yield locks for prepared transactions. + yieldLocksForPreparedTransactions(opCtx.get()); + stdx::unique_lock<stdx::mutex> lk(_mutex); _topCoord->finishUnconditionalStepDown(); diff --git a/src/mongo/db/session_catalog_mongod.cpp b/src/mongo/db/session_catalog_mongod.cpp index dd6072af24e..5aacb745275 100644 --- a/src/mongo/db/session_catalog_mongod.cpp +++ b/src/mongo/db/session_catalog_mongod.cpp @@ -28,7 +28,7 @@ * it in the license file. */ -#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kWrite +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kTransaction #include "mongo/platform/basic.h" @@ -43,6 +43,7 @@ #include "mongo/db/transaction_participant.h" #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/util/concurrency/thread_pool.h" +#include "mongo/util/log.h" namespace mongo { namespace { @@ -138,6 +139,8 @@ void MongoDSessionCatalog::onStepUp(OperationContext* opCtx) { MongoDOperationContextSession ocs(newOpCtx.get()); auto txnParticipant = TransactionParticipant::get(OperationContextSession::get(newOpCtx.get())); + LOG(3) << "Restoring locks of prepared transaction. SessionId: " << sessionId.getId() + << " TxnNumber: " << txnParticipant->getActiveTxnNumber(); txnParticipant->refreshLocksForPreparedTransaction(newOpCtx.get(), false); } } diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index a2aacecac3b..2dfe999cc1c 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -1097,7 +1097,12 @@ void TransactionParticipant::commitPreparedTransaction(OperationContext* opCtx, auto opObserver = opCtx->getServiceContext()->getOpObserver(); invariant(opObserver); - opObserver->onTransactionCommit(opCtx, commitOplogSlot, commitTimestamp); + + { + // Once the transaction is committed, the oplog entry must be written. + UninterruptibleLockGuard lockGuard(opCtx->lockState()); + opObserver->onTransactionCommit(opCtx, commitOplogSlot, commitTimestamp); + } lk.lock(); _checkIsActiveTransaction(lk, *opCtx->getTxnNumber(), true); |