summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2019-01-21 19:11:41 -0500
committerSiyuan Zhou <siyuan.zhou@mongodb.com>2019-01-24 22:49:49 -0500
commitd1933f0b1b88c4c2ad0f2fdd6ab106f7eeacddfa (patch)
treef28cbc8eb44fa4363c9824e0e772014c0306f433
parentec4520d72b2a4ff6ba980e913c988b11e7d188a4 (diff)
downloadmongo-d1933f0b1b88c4c2ad0f2fdd6ab106f7eeacddfa.tar.gz
SERVER-38282 Yield locks for prepared transactions on stepdown.
-rw-r--r--jstests/core/txns/libs/prepare_helpers.js2
-rw-r--r--jstests/replsets/prepared_transaction_on_failover.js91
-rw-r--r--src/mongo/db/kill_sessions_local.cpp32
-rw-r--r--src/mongo/db/kill_sessions_local.h5
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp4
-rw-r--r--src/mongo/db/session_catalog_mongod.cpp5
-rw-r--r--src/mongo/db/transaction_participant.cpp7
9 files changed, 162 insertions, 3 deletions
diff --git a/jstests/core/txns/libs/prepare_helpers.js b/jstests/core/txns/libs/prepare_helpers.js
index 56927af8b56..d157fc351ce 100644
--- a/jstests/core/txns/libs/prepare_helpers.js
+++ b/jstests/core/txns/libs/prepare_helpers.js
@@ -40,7 +40,7 @@ const PrepareHelpers = (function() {
// End the transaction on the shell session.
if (res.ok) {
- session.commitTransaction();
+ session.commitTransaction_forTesting();
} else {
session.abortTransaction_forTesting();
}
diff --git a/jstests/replsets/prepared_transaction_on_failover.js b/jstests/replsets/prepared_transaction_on_failover.js
new file mode 100644
index 00000000000..faec21bc52b
--- /dev/null
+++ b/jstests/replsets/prepared_transaction_on_failover.js
@@ -0,0 +1,91 @@
+/**
+ * Tests prepared transactions can survive failover and commit on a new primary.
+ *
+ * @tags: [uses_transactions, uses_prepare_transaction]
+ */
+(function() {
+ "use strict";
+ load("jstests/core/txns/libs/prepare_helpers.js");
+ load("jstests/replsets/rslib.js"); // For reconnect()
+
+ const replTest = new ReplSetTest({nodes: 2});
+ replTest.startSet();
+ replTest.initiate();
+
+ const dbName = jsTest.name();
+ const collName = "coll";
+ const otherDbName = dbName + "_other";
+
+ function testTransactionsWithFailover(stepDownFunction) {
+ const primary = replTest.getPrimary();
+ const newPrimary = replTest.getSecondary();
+ const testDB = primary.getDB(dbName);
+
+ testDB.dropDatabase();
+ testDB.getSiblingDB(otherDbName).dropDatabase();
+ assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}}));
+
+ jsTestLog("Starting transaction");
+ const session = primary.startSession({causalConsistency: false});
+ const sessionDB = session.getDatabase(dbName);
+ session.startTransaction({writeConcern: {w: "majority"}});
+
+ const doc = {_id: "txn on primary " + primary};
+ assert.commandWorked(sessionDB.getCollection(collName).insert(doc));
+
+ jsTestLog("Putting transaction into prepare");
+ const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+ replTest.awaitReplication();
+
+ stepDownFunction();
+ reconnect(primary);
+
+ jsTestLog("Waiting for the other node to run for election and become primary");
+ assert.eq(replTest.getPrimary(), newPrimary);
+
+ jsTestLog("Creating an unrelated collection");
+ // Application of an unrelated DDL command needs a strong lock on secondary. Make sure
+ // the prepared transactions have yielded their locks on secondary.
+ assert.commandWorked(newPrimary.getDB(otherDbName).runCommand({create: collName}));
+ replTest.awaitReplication();
+
+ jsTestLog("Dropping the collection in use cannot acquire the lock");
+ assert.commandFailedWithCode(
+ newPrimary.getDB(testDB).runCommand({drop: collName, maxTimeMS: 1000}),
+ ErrorCodes.MaxTimeMSExpired);
+
+ jsTestLog("Committing transaction on the new primary");
+ // Create a proxy session to reuse the session state of the old primary.
+ const newSession = new _DelegatingDriverSession(newPrimary, session);
+
+ assert.commandWorked(
+ PrepareHelpers.commitTransactionAfterPrepareTS(newSession, prepareTimestamp));
+ replTest.awaitReplication();
+
+ assert.docEq(doc, testDB.getCollection(collName).findOne());
+ assert.docEq(doc, newPrimary.getDB(dbName).getCollection(collName).findOne());
+
+ jsTestLog("Running another transaction on the new primary");
+ const secondSession = newPrimary.startSession({causalConsistency: false});
+ secondSession.startTransaction({writeConcern: {w: "majority"}});
+ assert.commandWorked(
+ secondSession.getDatabase(dbName).getCollection(collName).insert({_id: "second-doc"}));
+ secondSession.commitTransaction();
+ }
+
+ function stepDownViaHeartbeat() {
+ jsTestLog("Stepping down primary via heartbeat");
+ replTest.stepUp(replTest.getSecondary());
+ }
+ testTransactionsWithFailover(stepDownViaHeartbeat);
+
+ function stepDownViaCommand() {
+ jsTestLog("Stepping down primary via command");
+ assert.throws(function() {
+ replTest.getPrimary().adminCommand({replSetStepDown: 10});
+ });
+ }
+ testTransactionsWithFailover(stepDownViaCommand);
+
+ replTest.stopSet();
+})();
diff --git a/src/mongo/db/kill_sessions_local.cpp b/src/mongo/db/kill_sessions_local.cpp
index 8f86d2e7339..ed16522171d 100644
--- a/src/mongo/db/kill_sessions_local.cpp
+++ b/src/mongo/db/kill_sessions_local.cpp
@@ -178,4 +178,36 @@ void killSessionsAbortAllPreparedTransactions(OperationContext* opCtx) {
});
}
+void yieldLocksForPreparedTransactions(OperationContext* opCtx) {
+ // Create a new opCtx because we need an empty locker to refresh the locks.
+ auto newClient = opCtx->getServiceContext()->makeClient("prepared-txns-yield-locks");
+ AlternativeClientRegion acr(newClient);
+ auto newOpCtx = cc().makeOperationContext();
+
+ // Scan the sessions again to get the list of all sessions with prepared transaction
+ // to yield their locks.
+ SessionKiller::Matcher matcherAllSessions(
+ KillAllSessionsByPatternSet{makeKillAllSessionsByPattern(newOpCtx.get())});
+ killSessionsAction(
+ newOpCtx.get(),
+ matcherAllSessions,
+ [](const ObservableSession& session) {
+ return TransactionParticipant::get(session.get())->transactionIsPrepared();
+ },
+ [](OperationContext* killerOpCtx, const SessionToKill& session) {
+ auto const txnParticipant = TransactionParticipant::get(session.get());
+ // Yield locks for prepared transactions.
+ // When scanning and killing operations, all prepared transactions are included in the
+ // list. Even though new sessions may be created after the scan, none of them can become
+ // prepared during stepdown, since the RSTL has been enqueued, preventing any new
+ // writes.
+ if (txnParticipant->transactionIsPrepared()) {
+ LOG(3) << "Yielding locks of prepared transaction. SessionId: "
+ << session.getSessionId().getId()
+ << " TxnNumber: " << txnParticipant->getActiveTxnNumber();
+ txnParticipant->refreshLocksForPreparedTransaction(killerOpCtx, true);
+ }
+ });
+}
+
} // namespace mongo
diff --git a/src/mongo/db/kill_sessions_local.h b/src/mongo/db/kill_sessions_local.h
index d8eeb39303e..b439f5c5273 100644
--- a/src/mongo/db/kill_sessions_local.h
+++ b/src/mongo/db/kill_sessions_local.h
@@ -66,4 +66,9 @@ void killSessionsLocalShutdownAllTransactions(OperationContext* opCtx);
*/
void killSessionsAbortAllPreparedTransactions(OperationContext* opCtx);
+/**
+ * Yields locks of prepared transactions.
+ */
+void yieldLocksForPreparedTransactions(OperationContext* opCtx);
+
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 765f3ee6db0..560e2d61338 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1974,6 +1974,10 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
}
// Stepdown success!
+
+ // Yield locks for prepared transactions.
+ yieldLocksForPreparedTransactions(opCtx);
+
onExitGuard.dismiss();
updateMemberState();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index a987b27e0d4..be46d90817d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -474,6 +474,21 @@ private:
void startKillOpThread();
/**
+ * On stepdown, we need to kill all write operations and all transactional operations,
+ * so that unprepared and prepared transactions can release or yield their locks.
+ * The required ordering between stepdown steps is:
+ * 1) Enqueue RSTL in X mode.
+ * 2) Kill all write operations and operations with S locks
+ * 3) Abort unprepared transactions.
+ * 4) Repeat step 2) and 3) until the stepdown thread can acquire RSTL.
+ * 5) Yield locks of all prepared transactions.
+ *
+ * Since prepared transactions don't hold RSTL, step 1) to step 3) make sure all
+ * running transactions that may hold RSTL finish, get killed or yield their locks,
+ * so that we can acquire RSTL at step 4). Holding the locks of prepared transactions
+ * until step 5) guarantees if any conflict operations (e.g. DDL operations) failed
+ * to be killed for any reason, we will get a deadlock instead of a silent data corruption.
+ *
* Loops continuously to kill all user operations that have global lock except in IS mode.
* And, aborts all stashed (inactive) transactions.
* Terminates once killSignaled is set true.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 39bd6cceffa..ece31e5739d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -40,6 +40,7 @@
#include "mongo/base/status.h"
#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
+#include "mongo/db/kill_sessions_local.h"
#include "mongo/db/logical_clock.h"
#include "mongo/db/logical_time_validator.h"
#include "mongo/db/operation_context.h"
@@ -396,6 +397,9 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
rstlLock.waitForLockUntil(Date_t::max());
}
+ // Yield locks for prepared transactions.
+ yieldLocksForPreparedTransactions(opCtx.get());
+
stdx::unique_lock<stdx::mutex> lk(_mutex);
_topCoord->finishUnconditionalStepDown();
diff --git a/src/mongo/db/session_catalog_mongod.cpp b/src/mongo/db/session_catalog_mongod.cpp
index dd6072af24e..5aacb745275 100644
--- a/src/mongo/db/session_catalog_mongod.cpp
+++ b/src/mongo/db/session_catalog_mongod.cpp
@@ -28,7 +28,7 @@
* it in the license file.
*/
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kWrite
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kTransaction
#include "mongo/platform/basic.h"
@@ -43,6 +43,7 @@
#include "mongo/db/transaction_participant.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/util/concurrency/thread_pool.h"
+#include "mongo/util/log.h"
namespace mongo {
namespace {
@@ -138,6 +139,8 @@ void MongoDSessionCatalog::onStepUp(OperationContext* opCtx) {
MongoDOperationContextSession ocs(newOpCtx.get());
auto txnParticipant =
TransactionParticipant::get(OperationContextSession::get(newOpCtx.get()));
+ LOG(3) << "Restoring locks of prepared transaction. SessionId: " << sessionId.getId()
+ << " TxnNumber: " << txnParticipant->getActiveTxnNumber();
txnParticipant->refreshLocksForPreparedTransaction(newOpCtx.get(), false);
}
}
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index a2aacecac3b..2dfe999cc1c 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -1097,7 +1097,12 @@ void TransactionParticipant::commitPreparedTransaction(OperationContext* opCtx,
auto opObserver = opCtx->getServiceContext()->getOpObserver();
invariant(opObserver);
- opObserver->onTransactionCommit(opCtx, commitOplogSlot, commitTimestamp);
+
+ {
+ // Once the transaction is committed, the oplog entry must be written.
+ UninterruptibleLockGuard lockGuard(opCtx->lockState());
+ opObserver->onTransactionCommit(opCtx, commitOplogSlot, commitTimestamp);
+ }
lk.lock();
_checkIsActiveTransaction(lk, *opCtx->getTxnNumber(), true);