summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2018-03-28 13:39:45 -0400
committerLouis Williams <louis.williams@mongodb.com>2018-04-30 11:50:37 -0400
commit07d7a7095a7ebb116b0d02a4ac396620710e9e77 (patch)
treef17d474d953d49cc1d5d130a8a6c38fd975d10a2 /src/mongo/db/repl
parent3d43d9420c12c2f47d614fc6f2546cf80742817e (diff)
downloadmongo-07d7a7095a7ebb116b0d02a4ac396620710e9e77.tar.gz
SERVER-33674 Require GlobalLocks with deadlines to specify interrupt behavior when interrupted
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/do_txn.cpp2
-rw-r--r--src/mongo/db/repl/noop_writer.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp9
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp24
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp7
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp4
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp2
7 files changed, 29 insertions, 25 deletions
diff --git a/src/mongo/db/repl/do_txn.cpp b/src/mongo/db/repl/do_txn.cpp
index cfb968c71e6..52a1f36fb31 100644
--- a/src/mongo/db/repl/do_txn.cpp
+++ b/src/mongo/db/repl/do_txn.cpp
@@ -289,7 +289,7 @@ Status doTxn(OperationContext* opCtx,
// Acquire global lock in IX mode so that the replication state check will remain valid.
- Lock::GlobalLock globalLock(opCtx, MODE_IX, Date_t::max());
+ Lock::GlobalLock globalLock(opCtx, MODE_IX);
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
bool userInitiatedWritesAndNotPrimary =
diff --git a/src/mongo/db/repl/noop_writer.cpp b/src/mongo/db/repl/noop_writer.cpp
index 12af44fb559..27a24c011ec 100644
--- a/src/mongo/db/repl/noop_writer.cpp
+++ b/src/mongo/db/repl/noop_writer.cpp
@@ -141,12 +141,10 @@ void NoopWriter::stopWritingPeriodicNoops() {
}
void NoopWriter::_writeNoop(OperationContext* opCtx) {
- // Ensure that we don't trigger an exception when attempting to take locks.
- UninterruptibleLockGuard noInterrupt(opCtx->lockState());
-
// Use GlobalLock + lockMMAPV1Flush instead of DBLock to allow return when the lock is not
// available. It may happen when the primary steps down and a shared global lock is acquired.
- Lock::GlobalLock lock(opCtx, MODE_IX, Date_t::now() + Milliseconds(1));
+ Lock::GlobalLock lock(
+ opCtx, MODE_IX, Date_t::now() + Milliseconds(1), Lock::InterruptBehavior::kLeaveUnlocked);
if (!lock.isLocked()) {
LOG(1) << "Global lock is not available skipping noopWrite";
return;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 1ecb7e6fee2..4f3e7d58526 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1606,8 +1606,11 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
return {ErrorCodes::NotMaster, "not primary so can't step down"};
}
- auto globalLock = stdx::make_unique<Lock::GlobalLock>(
- opCtx, MODE_X, stepDownUntil, Lock::GlobalLock::EnqueueOnly());
+ auto globalLock = stdx::make_unique<Lock::GlobalLock>(opCtx,
+ MODE_X,
+ stepDownUntil,
+ Lock::InterruptBehavior::kThrow,
+ Lock::GlobalLock::EnqueueOnly());
// We've requested the global exclusive lock which will stop new operations from coming in,
// but existing operations could take a long time to finish, so kill all user operations
@@ -1718,7 +1721,7 @@ Status ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
// to acquire it now. For the same reason, we also disable lock acquisition
// interruption, to guarantee that we get the lock eventually.
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
- globalLock.reset(new Lock::GlobalLock(opCtx, MODE_X, Date_t::max()));
+ globalLock.reset(new Lock::GlobalLock(opCtx, MODE_X));
invariant(globalLock->isLocked());
lk.lock();
});
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index fd48c7c71cf..df475771ed0 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -2271,7 +2271,7 @@ TEST_F(PrimaryCatchUpTest, PrimaryDoesNotNeedToCatchUp) {
ASSERT_EQ(1, countLogLinesContaining("Caught up to the latest optime known via heartbeats"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2294,7 +2294,7 @@ TEST_F(PrimaryCatchUpTest, CatchupSucceeds) {
ASSERT_EQUALS(1, countLogLinesContaining("Caught up to the latest known optime successfully"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2314,7 +2314,7 @@ TEST_F(PrimaryCatchUpTest, CatchupTimeout) {
ASSERT_EQUALS(1, countLogLinesContaining("Catchup timed out"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2339,7 +2339,7 @@ TEST_F(PrimaryCatchUpTest, CannotSeeAllNodes) {
ASSERT_EQ(1, countLogLinesContaining("Caught up to the latest optime known via heartbeats"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2364,7 +2364,7 @@ TEST_F(PrimaryCatchUpTest, HeartbeatTimeout) {
ASSERT_EQ(1, countLogLinesContaining("Caught up to the latest optime known via heartbeats"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2387,7 +2387,7 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownBeforeHeartbeatRefreshing) {
ASSERT_EQUALS(0, countLogLinesContaining("Caught up to the latest"));
ASSERT_EQUALS(0, countLogLinesContaining("Catchup timed out"));
auto opCtx = makeOperationContext();
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2416,7 +2416,7 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringCatchUp) {
ASSERT_EQUALS(0, countLogLinesContaining("Caught up to the latest"));
ASSERT_EQUALS(0, countLogLinesContaining("Catchup timed out"));
auto opCtx = makeOperationContext();
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2459,11 +2459,11 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringDrainMode) {
ASSERT(replCoord->getApplierState() == ApplierState::Draining);
auto opCtx = makeOperationContext();
{
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_FALSE(replCoord->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT(replCoord->getApplierState() == ApplierState::Stopped);
ASSERT_TRUE(replCoord->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2527,7 +2527,7 @@ TEST_F(PrimaryCatchUpTest, FreshestNodeBecomesAvailableLater) {
ASSERT_EQ(1, countLogLinesContaining("Caught up to the latest"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2572,7 +2572,7 @@ TEST_F(PrimaryCatchUpTest, InfiniteTimeoutAndAbort) {
ASSERT_EQUALS(0, countLogLinesContaining("Catchup timed out"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
@@ -2586,7 +2586,7 @@ TEST_F(PrimaryCatchUpTest, ZeroTimeout) {
ASSERT_EQUALS(1, countLogLinesContaining("Skipping primary catchup"));
auto opCtx = makeOperationContext();
signalDrainComplete(opCtx.get());
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index dd731fdcacb..849fc58cdc1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -394,8 +394,11 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
}
auto opCtx = cc().makeOperationContext();
- Lock::GlobalLock globalExclusiveLock{
- opCtx.get(), MODE_X, Date_t::max(), Lock::GlobalLock::EnqueueOnly()};
+ Lock::GlobalLock globalExclusiveLock{opCtx.get(),
+ MODE_X,
+ Date_t::max(),
+ Lock::InterruptBehavior::kThrow,
+ Lock::GlobalLock::EnqueueOnly()};
_externalState->killAllUserOperations(opCtx.get());
globalExclusiveLock.waitForLockUntil(Date_t::max());
invariant(globalExclusiveLock.isLocked());
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 82ae756925d..41b572c0ef0 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -2161,7 +2161,7 @@ TEST_F(StepDownTest, InterruptingStepDownCommandRestoresWriteAvailability) {
// This is the important check, that we didn't accidentally step back up when aborting the
// stepdown command attempt.
const auto opCtx = makeOperationContext();
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin"));
}
@@ -2217,7 +2217,7 @@ TEST_F(StepDownTest, InterruptingAfterUnconditionalStepdownDoesNotRestoreWriteAv
// This is the important check, that we didn't accidentally step back up when aborting the
// stepdown command attempt.
- Lock::GlobalLock lock(opCtx.get(), MODE_IX, Date_t::max());
+ Lock::GlobalLock lock(opCtx.get(), MODE_IX);
ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "admin"));
}
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index e92b0c9ab2d..ed65f5a0150 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -302,7 +302,7 @@ Status RollbackImpl::_awaitBgIndexCompletion(OperationContext* opCtx) {
StorageEngine* storageEngine = opCtx->getServiceContext()->getGlobalStorageEngine();
std::vector<std::string> dbs;
{
- Lock::GlobalLock lk(opCtx, MODE_IS, Date_t::max());
+ Lock::GlobalLock lk(opCtx, MODE_IS);
storageEngine->listDatabases(&dbs);
}