summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Seyster <justin.seyster@mongodb.com>2018-03-30 15:25:47 -0400
committerJustin Seyster <justin.seyster@mongodb.com>2018-04-18 16:41:30 -0400
commitd9a5a306690d7cdb8831e64441a66cdd503d8064 (patch)
tree9d28b12e88351a4efab5b9d883f9a79ac2b2044a
parentcf34f10d361240f4a94f5fae94cb55bc2c0acf9b (diff)
downloadmongo-d9a5a306690d7cdb8831e64441a66cdd503d8064.tar.gz
SERVER-27534 All writing operations must fail if the term changes.
This reapplies bc19d43f, which was reverted by ae50776b. It also adds more test fixes.
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_auth.yml2
-rw-r--r--jstests/replsets/interrupted_batch_insert.js126
-rw-r--r--src/mongo/client/embedded/embedded.cpp1
-rw-r--r--src/mongo/db/catalog/rename_collection.cpp3
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp3
-rw-r--r--src/mongo/db/concurrency/d_concurrency_test.cpp74
-rw-r--r--src/mongo/db/concurrency/deadlock_detection_test.cpp44
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp18
-rw-r--r--src/mongo/db/concurrency/lock_state.h6
-rw-r--r--src/mongo/db/concurrency/lock_state_test.cpp8
-rw-r--r--src/mongo/db/concurrency/lock_stats_test.cpp2
-rw-r--r--src/mongo/db/introspect.cpp5
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp8
-rw-r--r--src/mongo/db/repair_database.cpp3
-rw-r--r--src/mongo/db/repl/noop_writer.cpp3
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp3
-rw-r--r--src/mongo/db/service_entry_point_common.cpp3
17 files changed, 277 insertions, 35 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_auth.yml b/buildscripts/resmokeconfig/suites/replica_sets_auth.yml
index 864230bbce7..9bb2e5b1602 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_auth.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_auth.yml
@@ -11,6 +11,8 @@ selector:
exclude_files:
# Skip any tests that run with auth explicitly.
- jstests/replsets/*[aA]uth*.js
+ # Also skip tests that require a ScopedThread, because ScopedThreads don't inherit credentials.
+ - jstests/replsets/interrupted_batch_insert.js
executor:
config:
diff --git a/jstests/replsets/interrupted_batch_insert.js b/jstests/replsets/interrupted_batch_insert.js
new file mode 100644
index 00000000000..b55214af05f
--- /dev/null
+++ b/jstests/replsets/interrupted_batch_insert.js
@@ -0,0 +1,126 @@
+// Tests the scenario described in SERVER-2753.
+// 1. Send a single insert command with a large number of documents and the {ordered: true} option.
+// 2. Force the thread processing the insert command to hang inbetween insert batches. (Inserts are
+// typically split into batches of 64, and the server yields locks between batches.)
+// 3. Disconnect the original primary from the network, forcing another node to step up.
+// 4. Insert a single document on the new primary.
+// 5. Return the original primary to the network and force it to step up by disconnecting the
+// primary that replaced it. The original primary has to roll back any batches from step 1
+// that were inserted locally but did not get majority committed before the insert in step 4.
+// 6. Unpause the thread performing the insert from step 1. If it continues to
+// insert batches even though there was a rollback, those inserts will
+// violate the {ordered: true} option.
+
+load('jstests/libs/parallelTester.js');
+load("jstests/replsets/rslib.js");
+
+(function() {
+ "use strict";
+
+ var name = "interrupted_batch_insert";
+ var replTest = new ReplSetTest({name: name, nodes: 3, useBridge: true});
+ var nodes = replTest.nodeList();
+
+ var conns = replTest.startSet();
+ replTest.initiate({
+ _id: name,
+ members: [
+ {_id: 0, host: nodes[0]},
+ {_id: 1, host: nodes[1]},
+ {_id: 2, host: nodes[2], priority: 0}
+ ]
+ });
+
+ // The test starts with node 0 as the primary.
+ replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
+ var primary = replTest.nodes[0];
+ var collName = primary.getDB("db")[name].getFullName();
+
+ var getParameterResult =
+ primary.getDB("admin").runCommand({getParameter: 1, internalInsertMaxBatchSize: 1});
+ assert.commandWorked(getParameterResult);
+ const batchSize = getParameterResult.internalInsertMaxBatchSize;
+
+ // Prevent node 1 from getting any data from the node 0 oplog.
+ conns[0].disconnect(conns[1]);
+
+ // Allow the primary to insert the first 5 batches of documents. After that, the fail point
+ // activates, and the client thread hangs until the fail point gets turned off.
+ assert.commandWorked(primary.getDB("db").adminCommand(
+ {configureFailPoint: "hangDuringBatchInsert", mode: {skip: 5}}));
+
+ // In a background thread, issue an insert command to the primary that will insert 10 batches of
+ // documents.
+ var worker = new ScopedThread((host, collName, numToInsert) => {
+ // Insert elements [{idx: 0}, {idx: 1}, ..., {idx: numToInsert - 1}].
+ const docsToInsert = Array.from({length: numToInsert}, (_, i) => {
+ return {idx: i};
+ });
+ var coll = new Mongo(host).getCollection(collName);
+ assert.throws(
+ () => coll.insert(docsToInsert,
+ {writeConcern: {w: "majority", wtimeout: 5000}, ordered: true}),
+ [],
+ "network error");
+ }, primary.host, collName, 10 * batchSize);
+ worker.start();
+
+ // Wait long enough to guarantee that all 5 batches of inserts have executed and the primary is
+ // hung on the "hangDuringBatchInsert" fail point.
+ sleep(1000);
+
+ // Make sure the insert command is, in fact, running in the background.
+ assert.eq(primary.getDB("db").currentOp({"command.insert": name, active: true}).inprog.length,
+ 1);
+
+ // Completely isolate the current primary (node 0), forcing it to step down.
+ conns[0].disconnect(conns[2]);
+
+ // Wait for node 1, the only other eligible node, to become the new primary.
+ replTest.waitForState(replTest.nodes[1], ReplSetTest.State.PRIMARY);
+
+ // Wait for node 2 to acknowledge node 1 as the new primary.
+ replTest.awaitSyncSource(replTest.nodes[2], replTest.nodes[1]);
+
+ // Issue a write to the new primary.
+ var collOnNewPrimary = replTest.nodes[1].getCollection(collName);
+ assert.writeOK(collOnNewPrimary.insert({singleDoc: 1}, {writeConcern: {w: "majority"}}));
+
+ // Isolate node 1, forcing it to step down as primary, and reconnect node 0, allowing it to step
+ // up again.
+ conns[0].reconnect(conns[2]);
+ conns[1].disconnect(conns[2]);
+
+ // Wait for node 0 to become primary again.
+ replTest.waitForState(primary, ReplSetTest.State.PRIMARY);
+
+ // Wait until node 2 recognizes node 0 as primary.
+ replTest.awaitSyncSource(replTest.nodes[2], primary);
+
+ // Allow the batch insert to continue.
+ assert.commandWorked(primary.getDB("db").adminCommand(
+ {configureFailPoint: "hangDuringBatchInsert", mode: "off"}));
+
+ // Wait until the insert command is done.
+ assert.soon(
+ () =>
+ primary.getDB("db").currentOp({"command.insert": name, active: true}).inprog.length ===
+ 0);
+
+ worker.join();
+
+ var docs = primary.getDB("db")[name].find({idx: {$exists: 1}}).sort({idx: 1}).toArray();
+
+ // Any discontinuity in the "idx" values is an error. If an "idx" document failed to insert, all
+ // the of "idx" documents after it should also have failed to insert, because the insert
+ // specified {ordered: 1}. Note, if none of the inserts were successful, that's fine.
+ docs.forEach((element, index) => {
+ assert.eq(element.idx, index);
+ });
+
+ // Reconnect the remaining disconnected nodes, so we can exit.
+ conns[0].reconnect(conns[1]);
+ conns[1].reconnect(conns[2]);
+
+ replTest.stopSet(15);
+}());
diff --git a/src/mongo/client/embedded/embedded.cpp b/src/mongo/client/embedded/embedded.cpp
index 9f99b33d253..6ad81043143 100644
--- a/src/mongo/client/embedded/embedded.cpp
+++ b/src/mongo/client/embedded/embedded.cpp
@@ -145,6 +145,7 @@ void shutdown(ServiceContext* srvContext) {
// Close all open databases, shutdown storage engine and run all deinitializers.
auto shutdownOpCtx = serviceContext->makeOperationContext(client);
{
+ UninterruptibleLockGuard noInterrupt(shutdownOpCtx->lockState());
Lock::GlobalLock lk(shutdownOpCtx.get(), MODE_X, Date_t::max());
dbHolder().closeAll(shutdownOpCtx.get(), "shutdown");
diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp
index 8aa634a14c5..4d6d08753a5 100644
--- a/src/mongo/db/catalog/rename_collection.cpp
+++ b/src/mongo/db/catalog/rename_collection.cpp
@@ -332,6 +332,9 @@ Status renameCollectionCommon(OperationContext* opCtx,
// Dismissed on success
auto tmpCollectionDropper = MakeGuard([&] {
+ // Ensure that we don't trigger an exception when attempting to take locks.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
BSONObjBuilder unusedResult;
auto status =
dropCollection(opCtx,
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index 83188a92793..9f2904a971d 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -290,6 +290,9 @@ public:
// Only used by the failpoints.
const auto dropAndReaquireReadLock = [&readLock, opCtx, &request]() {
+ // Make sure an interrupted operation does not prevent us from reacquiring the lock.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
readLock.reset();
readLock.emplace(opCtx, request.nss);
};
diff --git a/src/mongo/db/concurrency/d_concurrency_test.cpp b/src/mongo/db/concurrency/d_concurrency_test.cpp
index 5c04128a022..5cddeda4c74 100644
--- a/src/mongo/db/concurrency/d_concurrency_test.cpp
+++ b/src/mongo/db/concurrency/d_concurrency_test.cpp
@@ -1176,6 +1176,80 @@ TEST_F(DConcurrencyTestFixture, TicketReacquireCanBeInterrupted) {
ASSERT_THROWS_CODE(result.get(), AssertionException, ErrorCodes::Interrupted);
}
+TEST_F(DConcurrencyTestFixture, GlobalLockInInterruptedContextThrowsEvenWhenUncontested) {
+ auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1);
+ auto opCtx = clients[0].second.get();
+
+ opCtx->markKilled();
+
+ boost::optional<Lock::GlobalRead> globalReadLock;
+ ASSERT_THROWS_CODE(
+ globalReadLock.emplace(opCtx, Date_t::now()), AssertionException, ErrorCodes::Interrupted);
+}
+
+TEST_F(DConcurrencyTestFixture, GlobalLockInInterruptedContextThrowsEvenAcquiringRecursively) {
+ auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1);
+ auto opCtx = clients[0].second.get();
+
+ Lock::GlobalWrite globalWriteLock(opCtx, Date_t::now());
+
+ opCtx->markKilled();
+
+ {
+ boost::optional<Lock::GlobalWrite> recursiveGlobalWriteLock;
+ ASSERT_THROWS_CODE(recursiveGlobalWriteLock.emplace(opCtx, Date_t::now()),
+ AssertionException,
+ ErrorCodes::Interrupted);
+ }
+}
+
+TEST_F(DConcurrencyTestFixture, GlobalLockInInterruptedContextRespectsUninterruptibleGuard) {
+ auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1);
+ auto opCtx = clients[0].second.get();
+
+ opCtx->markKilled();
+
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+ Lock::GlobalRead globalReadLock(opCtx, Date_t::now()); // Does not throw.
+}
+
+TEST_F(DConcurrencyTestFixture, DBLockInInterruptedContextThrowsEvenWhenUncontested) {
+ auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1);
+ auto opCtx = clients[0].second.get();
+
+ opCtx->markKilled();
+
+ boost::optional<Lock::DBLock> dbWriteLock;
+ ASSERT_THROWS_CODE(
+ dbWriteLock.emplace(opCtx, "db", MODE_IX), AssertionException, ErrorCodes::Interrupted);
+}
+
+TEST_F(DConcurrencyTestFixture, DBLockInInterruptedContextThrowsEvenWhenAcquiringRecursively) {
+ auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1);
+ auto opCtx = clients[0].second.get();
+
+ Lock::DBLock dbWriteLock(opCtx, "db", MODE_X);
+
+ opCtx->markKilled();
+
+ {
+ boost::optional<Lock::DBLock> recursiveDBWriteLock;
+ ASSERT_THROWS_CODE(recursiveDBWriteLock.emplace(opCtx, "db", MODE_X),
+ AssertionException,
+ ErrorCodes::Interrupted);
+ }
+}
+
+TEST_F(DConcurrencyTestFixture, DBLockInInterruptedContextRespectsUninterruptibleGuard) {
+ auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1);
+ auto opCtx = clients[0].second.get();
+
+ opCtx->markKilled();
+
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+ Lock::DBLock dbWriteLock(opCtx, "db", MODE_X); // Does not throw.
+}
+
TEST_F(DConcurrencyTestFixture, DBLockTimeout) {
auto clientOpctxPairs = makeKClientsWithLockers<DefaultLockerImpl>(2);
auto opctx1 = clientOpctxPairs[0].second.get();
diff --git a/src/mongo/db/concurrency/deadlock_detection_test.cpp b/src/mongo/db/concurrency/deadlock_detection_test.cpp
index ce29fd37b01..ead27261e1a 100644
--- a/src/mongo/db/concurrency/deadlock_detection_test.cpp
+++ b/src/mongo/db/concurrency/deadlock_detection_test.cpp
@@ -37,8 +37,8 @@ TEST(Deadlock, NoDeadlock) {
LockerForTests locker1(MODE_IS);
LockerForTests locker2(MODE_IS);
- ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resId, MODE_S));
- ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resId, MODE_S));
+ ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resId, MODE_S));
+ ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resId, MODE_S));
DeadlockDetector wfg1(*getGlobalLockManager(), &locker1);
ASSERT(!wfg1.check().hasCycle());
@@ -54,14 +54,14 @@ TEST(Deadlock, Simple) {
LockerForTests locker1(MODE_IX);
LockerForTests locker2(MODE_IX);
- ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resIdA, MODE_X));
- ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resIdB, MODE_X));
+ ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resIdA, MODE_X));
+ ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resIdB, MODE_X));
// 1 -> 2
- ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(resIdB, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resIdB, MODE_X));
// 2 -> 1
- ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(resIdA, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resIdA, MODE_X));
DeadlockDetector wfg1(*getGlobalLockManager(), &locker1);
ASSERT(wfg1.check().hasCycle());
@@ -81,12 +81,12 @@ TEST(Deadlock, SimpleUpgrade) {
LockerForTests locker2(MODE_IX);
// Both acquire lock in intent mode
- ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resId, MODE_IX));
- ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resId, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resId, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resId, MODE_IX));
// Both try to upgrade
- ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(resId, MODE_X));
- ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(resId, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resId, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resId, MODE_X));
DeadlockDetector wfg1(*getGlobalLockManager(), &locker1);
ASSERT(wfg1.check().hasCycle());
@@ -107,17 +107,17 @@ TEST(Deadlock, Indirect) {
LockerForTests locker2(MODE_IX);
LockerForTests lockerIndirect(MODE_IX);
- ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resIdA, MODE_X));
- ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resIdB, MODE_X));
+ ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resIdA, MODE_X));
+ ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resIdB, MODE_X));
// 1 -> 2
- ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(resIdB, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resIdB, MODE_X));
// 2 -> 1
- ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(resIdA, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resIdA, MODE_X));
// 3 -> 2
- ASSERT_EQUALS(LOCK_WAITING, lockerIndirect.lockBegin(resIdA, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, lockerIndirect.lockBegin(nullptr, resIdA, MODE_X));
DeadlockDetector wfg1(*getGlobalLockManager(), &locker1);
ASSERT(wfg1.check().hasCycle());
@@ -143,17 +143,17 @@ TEST(Deadlock, IndirectWithUpgrade) {
LockerForTests writer(MODE_IX);
// This sequence simulates the deadlock which occurs during flush
- ASSERT_EQUALS(LOCK_OK, writer.lockBegin(resIdFlush, MODE_IX));
- ASSERT_EQUALS(LOCK_OK, writer.lockBegin(resIdDb, MODE_X));
+ ASSERT_EQUALS(LOCK_OK, writer.lockBegin(nullptr, resIdFlush, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, writer.lockBegin(nullptr, resIdDb, MODE_X));
- ASSERT_EQUALS(LOCK_OK, reader.lockBegin(resIdFlush, MODE_IS));
+ ASSERT_EQUALS(LOCK_OK, reader.lockBegin(nullptr, resIdFlush, MODE_IS));
// R -> W
- ASSERT_EQUALS(LOCK_WAITING, reader.lockBegin(resIdDb, MODE_S));
+ ASSERT_EQUALS(LOCK_WAITING, reader.lockBegin(nullptr, resIdDb, MODE_S));
// R -> W
// F -> W
- ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(resIdFlush, MODE_S));
+ ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(nullptr, resIdFlush, MODE_S));
// W yields its flush lock, so now f is granted in mode S
//
@@ -164,14 +164,14 @@ TEST(Deadlock, IndirectWithUpgrade) {
//
// R -> W
// F -> R
- ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(resIdFlush, MODE_X));
+ ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(nullptr, resIdFlush, MODE_X));
// W comes back from the commit and tries to re-acquire the flush lock
//
// R -> W
// F -> R
// W -> F
- ASSERT_EQUALS(LOCK_WAITING, writer.lockBegin(resIdFlush, MODE_IX));
+ ASSERT_EQUALS(LOCK_WAITING, writer.lockBegin(nullptr, resIdFlush, MODE_IX));
// Run deadlock detection from the point of view of each of the involved lockers
DeadlockDetector wfgF(*getGlobalLockManager(), &flush);
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index d7991ac1a26..790ba748a0b 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -357,7 +357,7 @@ LockResult LockerImpl<IsForMMAPV1>::_lockGlobalBegin(OperationContext* opCtx,
}
_modeForTicket = mode;
}
- const LockResult result = lockBegin(resourceIdGlobal, mode);
+ const LockResult result = lockBegin(opCtx, resourceIdGlobal, mode);
if (result == LOCK_OK)
return LOCK_OK;
@@ -479,7 +479,7 @@ template <bool IsForMMAPV1>
LockResult LockerImpl<IsForMMAPV1>::lock(
OperationContext* opCtx, ResourceId resId, LockMode mode, Date_t deadline, bool checkDeadlock) {
- const LockResult result = lockBegin(resId, mode);
+ const LockResult result = lockBegin(opCtx, resId, mode);
// Fast, uncontended path
if (result == LOCK_OK)
@@ -723,7 +723,9 @@ void LockerImpl<IsForMMAPV1>::restoreLockState(OperationContext* opCtx,
}
template <bool IsForMMAPV1>
-LockResult LockerImpl<IsForMMAPV1>::lockBegin(ResourceId resId, LockMode mode) {
+LockResult LockerImpl<IsForMMAPV1>::lockBegin(OperationContext* opCtx,
+ ResourceId resId,
+ LockMode mode) {
dassert(!getWaitingResource().isValid());
LockRequest* request;
@@ -788,6 +790,16 @@ LockResult LockerImpl<IsForMMAPV1>::lockBegin(ResourceId resId, LockMode mode) {
if (result == LOCK_WAITING) {
globalStats.recordWait(_id, resId, mode);
_stats.recordWait(resId, mode);
+ } else if (result == LOCK_OK && opCtx && _uninterruptibleLocksRequested == 0) {
+ // Lock acquisitions are not allowed to succeed when opCtx is marked as interrupted, unless
+ // the caller requested an uninterruptible lock.
+ auto interruptStatus = opCtx->checkForInterruptNoAssert();
+ if (!interruptStatus.isOK()) {
+ auto unlockIt = _requests.find(resId);
+ invariant(unlockIt);
+ _unlockImpl(&unlockIt);
+ uassertStatusOK(interruptStatus);
+ }
}
return result;
diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h
index 2c31b941526..257b402024c 100644
--- a/src/mongo/db/concurrency/lock_state.h
+++ b/src/mongo/db/concurrency/lock_state.h
@@ -205,10 +205,14 @@ public:
* In other words for each call to lockBegin, which does not return LOCK_OK, there needs to
* be a corresponding call to either lockComplete or unlock.
*
+ * If an operation context is provided that represents an interrupted operation, lockBegin will
+ * throw an exception whenever it would have been possible to grant the lock with LOCK_OK. This
+ * behavior can be disabled with an UninterruptibleLockGuard.
+ *
* NOTE: These methods are not public and should only be used inside the class
* implementation and for unit-tests and not called directly.
*/
- LockResult lockBegin(ResourceId resId, LockMode mode);
+ LockResult lockBegin(OperationContext* opCtx, ResourceId resId, LockMode mode);
/**
* Waits for the completion of a lock, previously requested through lockBegin or
diff --git a/src/mongo/db/concurrency/lock_state_test.cpp b/src/mongo/db/concurrency/lock_state_test.cpp
index 5e37fdf5e35..7e299d7cc7f 100644
--- a/src/mongo/db/concurrency/lock_state_test.cpp
+++ b/src/mongo/db/concurrency/lock_state_test.cpp
@@ -268,12 +268,12 @@ TEST(LockerImpl, CanceledDeadlockUnblocks) {
ASSERT(LOCK_OK == locker2.lock(db2, MODE_X));
// Set up locker1 and locker2 for deadlock
- ASSERT(LOCK_WAITING == locker1.lockBegin(db2, MODE_X));
- ASSERT(LOCK_WAITING == locker2.lockBegin(db1, MODE_X));
+ ASSERT(LOCK_WAITING == locker1.lockBegin(nullptr, db2, MODE_X));
+ ASSERT(LOCK_WAITING == locker2.lockBegin(nullptr, db1, MODE_X));
// Locker3 blocks behind locker 2
ASSERT(LOCK_OK == locker3.lockGlobal(MODE_IX));
- ASSERT(LOCK_WAITING == locker3.lockBegin(db1, MODE_S));
+ ASSERT(LOCK_WAITING == locker3.lockBegin(nullptr, db1, MODE_S));
// Detect deadlock, canceling our request
ASSERT(
@@ -442,7 +442,7 @@ TEST(LockerImpl, GetLockerInfoShouldReportPendingLocks) {
DefaultLockerImpl conflictingLocker;
ASSERT_EQ(LOCK_OK, conflictingLocker.lockGlobal(MODE_IS));
ASSERT_EQ(LOCK_OK, conflictingLocker.lock(dbId, MODE_IS));
- ASSERT_EQ(LOCK_WAITING, conflictingLocker.lockBegin(collectionId, MODE_IS));
+ ASSERT_EQ(LOCK_WAITING, conflictingLocker.lockBegin(nullptr, collectionId, MODE_IS));
// Assert the held locks show up in the output of getLockerInfo().
Locker::LockerInfo lockerInfo;
diff --git a/src/mongo/db/concurrency/lock_stats_test.cpp b/src/mongo/db/concurrency/lock_stats_test.cpp
index 1d064a23fcb..53237a5e0f0 100644
--- a/src/mongo/db/concurrency/lock_stats_test.cpp
+++ b/src/mongo/db/concurrency/lock_stats_test.cpp
@@ -63,7 +63,7 @@ TEST(LockStats, Wait) {
{
// This will block
LockerForTests lockerConflict(MODE_IX);
- ASSERT_EQUALS(LOCK_WAITING, lockerConflict.lockBegin(resId, MODE_S));
+ ASSERT_EQUALS(LOCK_WAITING, lockerConflict.lockBegin(nullptr, resId, MODE_S));
// Sleep 1 millisecond so the wait time passes
ASSERT_EQUALS(
diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp
index aa0cb13e2e9..c48ab2089eb 100644
--- a/src/mongo/db/introspect.cpp
+++ b/src/mongo/db/introspect.cpp
@@ -116,6 +116,11 @@ void profile(OperationContext* opCtx, NetworkOp op) {
const string dbName(nsToDatabase(CurOp::get(opCtx)->getNS()));
try {
+ // Even if the operation we are profiling was interrupted, we still want to output the
+ // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions
+ // before we finish writing the entry.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
bool acquireDbXLock = false;
while (true) {
std::unique_ptr<AutoGetDb> autoGetDb;
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index 148bc2231d1..7edfd3d09d4 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -85,6 +85,7 @@ namespace {
MONGO_FP_DECLARE(failAllInserts);
MONGO_FP_DECLARE(failAllUpdates);
MONGO_FP_DECLARE(failAllRemoves);
+MONGO_FP_DECLARE(hangDuringBatchInsert);
void updateRetryStats(OperationContext* opCtx, bool containsRetry) {
if (containsRetry) {
@@ -382,7 +383,9 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
boost::optional<AutoGetCollection> collection;
auto acquireCollection = [&] {
while (true) {
- opCtx->checkForInterrupt();
+ if (MONGO_FAIL_POINT(hangDuringBatchInsert)) {
+ MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangDuringBatchInsert);
+ }
if (MONGO_FAIL_POINT(failAllInserts)) {
uasserted(ErrorCodes::InternalError, "failAllInserts failpoint active!");
@@ -620,7 +623,6 @@ static SingleWriteResult performSingleUpdateOp(OperationContext* opCtx,
boost::optional<AutoGetCollection> collection;
while (true) {
- opCtx->checkForInterrupt();
if (MONGO_FAIL_POINT(failAllUpdates)) {
uasserted(ErrorCodes::InternalError, "failAllUpdates failpoint active!");
}
@@ -776,8 +778,6 @@ static SingleWriteResult performSingleDeleteOp(OperationContext* opCtx,
ParsedDelete parsedDelete(opCtx, &request);
uassertStatusOK(parsedDelete.parseRequest());
- opCtx->checkForInterrupt();
-
if (MONGO_FAIL_POINT(failAllRemoves)) {
uasserted(ErrorCodes::InternalError, "failAllRemoves failpoint active!");
}
diff --git a/src/mongo/db/repair_database.cpp b/src/mongo/db/repair_database.cpp
index 2ff3eb85056..93773d7fdc4 100644
--- a/src/mongo/db/repair_database.cpp
+++ b/src/mongo/db/repair_database.cpp
@@ -269,6 +269,9 @@ Status repairDatabase(OperationContext* opCtx,
dbHolder().close(opCtx, dbName, "database closed for repair");
ON_BLOCK_EXIT([&dbName, &opCtx] {
try {
+ // Ensure that we don't trigger an exception when attempting to take locks.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
// Open the db after everything finishes.
auto db = dbHolder().openDb(opCtx, dbName);
diff --git a/src/mongo/db/repl/noop_writer.cpp b/src/mongo/db/repl/noop_writer.cpp
index 7496baf3179..12af44fb559 100644
--- a/src/mongo/db/repl/noop_writer.cpp
+++ b/src/mongo/db/repl/noop_writer.cpp
@@ -141,6 +141,9 @@ void NoopWriter::stopWritingPeriodicNoops() {
}
void NoopWriter::_writeNoop(OperationContext* opCtx) {
+ // Ensure that we don't trigger an exception when attempting to take locks.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
// Use GlobalLock + lockMMAPV1Flush instead of DBLock to allow return when the lock is not
// available. It may happen when the primary steps down and a shared global lock is acquired.
Lock::GlobalLock lock(opCtx, MODE_IX, Date_t::now() + Milliseconds(1));
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index 220e2658a35..6eea5422c0a 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -538,6 +538,9 @@ void MigrationChunkClonerSourceLegacy::_cleanup(OperationContext* opCtx) {
}
if (_deleteNotifyExec) {
+ // Don't allow an Interrupt exception to prevent _deleteNotifyExec from getting cleaned up.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
AutoGetCollection autoColl(opCtx, _args.getNss(), MODE_IS);
const auto cursorManager =
autoColl.getCollection() ? autoColl.getCollection()->getCursorManager() : nullptr;
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index fb67d0e5ff6..01fafcf60ce 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -1143,6 +1143,9 @@ DbResponse receivedGetMore(OperationContext* opCtx,
getMore(opCtx, ns, ntoreturn, cursorid, &exhaust, &isCursorAuthorized);
} catch (AssertionException& e) {
if (isCursorAuthorized) {
+ // Make sure that killCursorGlobal does not throw an exception if it is interrupted.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+
// If a cursor with id 'cursorid' was authorized, it may have been advanced
// before an exception terminated processGetMore. Erase the ClientCursor
// because it may now be out of sync with the client's iteration state.