diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2018-03-30 15:25:47 -0400 |
---|---|---|
committer | Justin Seyster <justin.seyster@mongodb.com> | 2018-04-18 16:41:30 -0400 |
commit | d9a5a306690d7cdb8831e64441a66cdd503d8064 (patch) | |
tree | 9d28b12e88351a4efab5b9d883f9a79ac2b2044a | |
parent | cf34f10d361240f4a94f5fae94cb55bc2c0acf9b (diff) | |
download | mongo-d9a5a306690d7cdb8831e64441a66cdd503d8064.tar.gz |
SERVER-27534 All writing operations must fail if the term changes.
This reapplies bc19d43f, which was reverted by ae50776b. It also adds
more test fixes.
-rw-r--r-- | buildscripts/resmokeconfig/suites/replica_sets_auth.yml | 2 | ||||
-rw-r--r-- | jstests/replsets/interrupted_batch_insert.js | 126 | ||||
-rw-r--r-- | src/mongo/client/embedded/embedded.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/catalog/rename_collection.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/concurrency/d_concurrency_test.cpp | 74 | ||||
-rw-r--r-- | src/mongo/db/concurrency/deadlock_detection_test.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.h | 6 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_stats_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/introspect.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_exec.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repair_database.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/noop_writer.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/service_entry_point_common.cpp | 3 |
17 files changed, 277 insertions, 35 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_auth.yml b/buildscripts/resmokeconfig/suites/replica_sets_auth.yml index 864230bbce7..9bb2e5b1602 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_auth.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_auth.yml @@ -11,6 +11,8 @@ selector: exclude_files: # Skip any tests that run with auth explicitly. - jstests/replsets/*[aA]uth*.js + # Also skip tests that require a ScopedThread, because ScopedThreads don't inherit credentials. + - jstests/replsets/interrupted_batch_insert.js executor: config: diff --git a/jstests/replsets/interrupted_batch_insert.js b/jstests/replsets/interrupted_batch_insert.js new file mode 100644 index 00000000000..b55214af05f --- /dev/null +++ b/jstests/replsets/interrupted_batch_insert.js @@ -0,0 +1,126 @@ +// Tests the scenario described in SERVER-2753. +// 1. Send a single insert command with a large number of documents and the {ordered: true} option. +// 2. Force the thread processing the insert command to hang inbetween insert batches. (Inserts are +// typically split into batches of 64, and the server yields locks between batches.) +// 3. Disconnect the original primary from the network, forcing another node to step up. +// 4. Insert a single document on the new primary. +// 5. Return the original primary to the network and force it to step up by disconnecting the +// primary that replaced it. The original primary has to roll back any batches from step 1 +// that were inserted locally but did not get majority committed before the insert in step 4. +// 6. Unpause the thread performing the insert from step 1. If it continues to +// insert batches even though there was a rollback, those inserts will +// violate the {ordered: true} option. + +load('jstests/libs/parallelTester.js'); +load("jstests/replsets/rslib.js"); + +(function() { + "use strict"; + + var name = "interrupted_batch_insert"; + var replTest = new ReplSetTest({name: name, nodes: 3, useBridge: true}); + var nodes = replTest.nodeList(); + + var conns = replTest.startSet(); + replTest.initiate({ + _id: name, + members: [ + {_id: 0, host: nodes[0]}, + {_id: 1, host: nodes[1]}, + {_id: 2, host: nodes[2], priority: 0} + ] + }); + + // The test starts with node 0 as the primary. + replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY); + var primary = replTest.nodes[0]; + var collName = primary.getDB("db")[name].getFullName(); + + var getParameterResult = + primary.getDB("admin").runCommand({getParameter: 1, internalInsertMaxBatchSize: 1}); + assert.commandWorked(getParameterResult); + const batchSize = getParameterResult.internalInsertMaxBatchSize; + + // Prevent node 1 from getting any data from the node 0 oplog. + conns[0].disconnect(conns[1]); + + // Allow the primary to insert the first 5 batches of documents. After that, the fail point + // activates, and the client thread hangs until the fail point gets turned off. + assert.commandWorked(primary.getDB("db").adminCommand( + {configureFailPoint: "hangDuringBatchInsert", mode: {skip: 5}})); + + // In a background thread, issue an insert command to the primary that will insert 10 batches of + // documents. + var worker = new ScopedThread((host, collName, numToInsert) => { + // Insert elements [{idx: 0}, {idx: 1}, ..., {idx: numToInsert - 1}]. + const docsToInsert = Array.from({length: numToInsert}, (_, i) => { + return {idx: i}; + }); + var coll = new Mongo(host).getCollection(collName); + assert.throws( + () => coll.insert(docsToInsert, + {writeConcern: {w: "majority", wtimeout: 5000}, ordered: true}), + [], + "network error"); + }, primary.host, collName, 10 * batchSize); + worker.start(); + + // Wait long enough to guarantee that all 5 batches of inserts have executed and the primary is + // hung on the "hangDuringBatchInsert" fail point. + sleep(1000); + + // Make sure the insert command is, in fact, running in the background. + assert.eq(primary.getDB("db").currentOp({"command.insert": name, active: true}).inprog.length, + 1); + + // Completely isolate the current primary (node 0), forcing it to step down. + conns[0].disconnect(conns[2]); + + // Wait for node 1, the only other eligible node, to become the new primary. + replTest.waitForState(replTest.nodes[1], ReplSetTest.State.PRIMARY); + + // Wait for node 2 to acknowledge node 1 as the new primary. + replTest.awaitSyncSource(replTest.nodes[2], replTest.nodes[1]); + + // Issue a write to the new primary. + var collOnNewPrimary = replTest.nodes[1].getCollection(collName); + assert.writeOK(collOnNewPrimary.insert({singleDoc: 1}, {writeConcern: {w: "majority"}})); + + // Isolate node 1, forcing it to step down as primary, and reconnect node 0, allowing it to step + // up again. + conns[0].reconnect(conns[2]); + conns[1].disconnect(conns[2]); + + // Wait for node 0 to become primary again. + replTest.waitForState(primary, ReplSetTest.State.PRIMARY); + + // Wait until node 2 recognizes node 0 as primary. + replTest.awaitSyncSource(replTest.nodes[2], primary); + + // Allow the batch insert to continue. + assert.commandWorked(primary.getDB("db").adminCommand( + {configureFailPoint: "hangDuringBatchInsert", mode: "off"})); + + // Wait until the insert command is done. + assert.soon( + () => + primary.getDB("db").currentOp({"command.insert": name, active: true}).inprog.length === + 0); + + worker.join(); + + var docs = primary.getDB("db")[name].find({idx: {$exists: 1}}).sort({idx: 1}).toArray(); + + // Any discontinuity in the "idx" values is an error. If an "idx" document failed to insert, all + // the of "idx" documents after it should also have failed to insert, because the insert + // specified {ordered: 1}. Note, if none of the inserts were successful, that's fine. + docs.forEach((element, index) => { + assert.eq(element.idx, index); + }); + + // Reconnect the remaining disconnected nodes, so we can exit. + conns[0].reconnect(conns[1]); + conns[1].reconnect(conns[2]); + + replTest.stopSet(15); +}()); diff --git a/src/mongo/client/embedded/embedded.cpp b/src/mongo/client/embedded/embedded.cpp index 9f99b33d253..6ad81043143 100644 --- a/src/mongo/client/embedded/embedded.cpp +++ b/src/mongo/client/embedded/embedded.cpp @@ -145,6 +145,7 @@ void shutdown(ServiceContext* srvContext) { // Close all open databases, shutdown storage engine and run all deinitializers. auto shutdownOpCtx = serviceContext->makeOperationContext(client); { + UninterruptibleLockGuard noInterrupt(shutdownOpCtx->lockState()); Lock::GlobalLock lk(shutdownOpCtx.get(), MODE_X, Date_t::max()); dbHolder().closeAll(shutdownOpCtx.get(), "shutdown"); diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp index 8aa634a14c5..4d6d08753a5 100644 --- a/src/mongo/db/catalog/rename_collection.cpp +++ b/src/mongo/db/catalog/rename_collection.cpp @@ -332,6 +332,9 @@ Status renameCollectionCommon(OperationContext* opCtx, // Dismissed on success auto tmpCollectionDropper = MakeGuard([&] { + // Ensure that we don't trigger an exception when attempting to take locks. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + BSONObjBuilder unusedResult; auto status = dropCollection(opCtx, diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 83188a92793..9f2904a971d 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -290,6 +290,9 @@ public: // Only used by the failpoints. const auto dropAndReaquireReadLock = [&readLock, opCtx, &request]() { + // Make sure an interrupted operation does not prevent us from reacquiring the lock. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + readLock.reset(); readLock.emplace(opCtx, request.nss); }; diff --git a/src/mongo/db/concurrency/d_concurrency_test.cpp b/src/mongo/db/concurrency/d_concurrency_test.cpp index 5c04128a022..5cddeda4c74 100644 --- a/src/mongo/db/concurrency/d_concurrency_test.cpp +++ b/src/mongo/db/concurrency/d_concurrency_test.cpp @@ -1176,6 +1176,80 @@ TEST_F(DConcurrencyTestFixture, TicketReacquireCanBeInterrupted) { ASSERT_THROWS_CODE(result.get(), AssertionException, ErrorCodes::Interrupted); } +TEST_F(DConcurrencyTestFixture, GlobalLockInInterruptedContextThrowsEvenWhenUncontested) { + auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1); + auto opCtx = clients[0].second.get(); + + opCtx->markKilled(); + + boost::optional<Lock::GlobalRead> globalReadLock; + ASSERT_THROWS_CODE( + globalReadLock.emplace(opCtx, Date_t::now()), AssertionException, ErrorCodes::Interrupted); +} + +TEST_F(DConcurrencyTestFixture, GlobalLockInInterruptedContextThrowsEvenAcquiringRecursively) { + auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1); + auto opCtx = clients[0].second.get(); + + Lock::GlobalWrite globalWriteLock(opCtx, Date_t::now()); + + opCtx->markKilled(); + + { + boost::optional<Lock::GlobalWrite> recursiveGlobalWriteLock; + ASSERT_THROWS_CODE(recursiveGlobalWriteLock.emplace(opCtx, Date_t::now()), + AssertionException, + ErrorCodes::Interrupted); + } +} + +TEST_F(DConcurrencyTestFixture, GlobalLockInInterruptedContextRespectsUninterruptibleGuard) { + auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1); + auto opCtx = clients[0].second.get(); + + opCtx->markKilled(); + + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + Lock::GlobalRead globalReadLock(opCtx, Date_t::now()); // Does not throw. +} + +TEST_F(DConcurrencyTestFixture, DBLockInInterruptedContextThrowsEvenWhenUncontested) { + auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1); + auto opCtx = clients[0].second.get(); + + opCtx->markKilled(); + + boost::optional<Lock::DBLock> dbWriteLock; + ASSERT_THROWS_CODE( + dbWriteLock.emplace(opCtx, "db", MODE_IX), AssertionException, ErrorCodes::Interrupted); +} + +TEST_F(DConcurrencyTestFixture, DBLockInInterruptedContextThrowsEvenWhenAcquiringRecursively) { + auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1); + auto opCtx = clients[0].second.get(); + + Lock::DBLock dbWriteLock(opCtx, "db", MODE_X); + + opCtx->markKilled(); + + { + boost::optional<Lock::DBLock> recursiveDBWriteLock; + ASSERT_THROWS_CODE(recursiveDBWriteLock.emplace(opCtx, "db", MODE_X), + AssertionException, + ErrorCodes::Interrupted); + } +} + +TEST_F(DConcurrencyTestFixture, DBLockInInterruptedContextRespectsUninterruptibleGuard) { + auto clients = makeKClientsWithLockers<DefaultLockerImpl>(1); + auto opCtx = clients[0].second.get(); + + opCtx->markKilled(); + + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + Lock::DBLock dbWriteLock(opCtx, "db", MODE_X); // Does not throw. +} + TEST_F(DConcurrencyTestFixture, DBLockTimeout) { auto clientOpctxPairs = makeKClientsWithLockers<DefaultLockerImpl>(2); auto opctx1 = clientOpctxPairs[0].second.get(); diff --git a/src/mongo/db/concurrency/deadlock_detection_test.cpp b/src/mongo/db/concurrency/deadlock_detection_test.cpp index ce29fd37b01..ead27261e1a 100644 --- a/src/mongo/db/concurrency/deadlock_detection_test.cpp +++ b/src/mongo/db/concurrency/deadlock_detection_test.cpp @@ -37,8 +37,8 @@ TEST(Deadlock, NoDeadlock) { LockerForTests locker1(MODE_IS); LockerForTests locker2(MODE_IS); - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resId, MODE_S)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resId, MODE_S)); + ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resId, MODE_S)); + ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resId, MODE_S)); DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); ASSERT(!wfg1.check().hasCycle()); @@ -54,14 +54,14 @@ TEST(Deadlock, Simple) { LockerForTests locker1(MODE_IX); LockerForTests locker2(MODE_IX); - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resIdA, MODE_X)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resIdB, MODE_X)); + ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resIdA, MODE_X)); + ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resIdB, MODE_X)); // 1 -> 2 - ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(resIdB, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resIdB, MODE_X)); // 2 -> 1 - ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(resIdA, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resIdA, MODE_X)); DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); ASSERT(wfg1.check().hasCycle()); @@ -81,12 +81,12 @@ TEST(Deadlock, SimpleUpgrade) { LockerForTests locker2(MODE_IX); // Both acquire lock in intent mode - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resId, MODE_IX)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resId, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resId, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resId, MODE_IX)); // Both try to upgrade - ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(resId, MODE_X)); - ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(resId, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resId, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resId, MODE_X)); DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); ASSERT(wfg1.check().hasCycle()); @@ -107,17 +107,17 @@ TEST(Deadlock, Indirect) { LockerForTests locker2(MODE_IX); LockerForTests lockerIndirect(MODE_IX); - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(resIdA, MODE_X)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(resIdB, MODE_X)); + ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resIdA, MODE_X)); + ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resIdB, MODE_X)); // 1 -> 2 - ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(resIdB, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resIdB, MODE_X)); // 2 -> 1 - ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(resIdA, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resIdA, MODE_X)); // 3 -> 2 - ASSERT_EQUALS(LOCK_WAITING, lockerIndirect.lockBegin(resIdA, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, lockerIndirect.lockBegin(nullptr, resIdA, MODE_X)); DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); ASSERT(wfg1.check().hasCycle()); @@ -143,17 +143,17 @@ TEST(Deadlock, IndirectWithUpgrade) { LockerForTests writer(MODE_IX); // This sequence simulates the deadlock which occurs during flush - ASSERT_EQUALS(LOCK_OK, writer.lockBegin(resIdFlush, MODE_IX)); - ASSERT_EQUALS(LOCK_OK, writer.lockBegin(resIdDb, MODE_X)); + ASSERT_EQUALS(LOCK_OK, writer.lockBegin(nullptr, resIdFlush, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, writer.lockBegin(nullptr, resIdDb, MODE_X)); - ASSERT_EQUALS(LOCK_OK, reader.lockBegin(resIdFlush, MODE_IS)); + ASSERT_EQUALS(LOCK_OK, reader.lockBegin(nullptr, resIdFlush, MODE_IS)); // R -> W - ASSERT_EQUALS(LOCK_WAITING, reader.lockBegin(resIdDb, MODE_S)); + ASSERT_EQUALS(LOCK_WAITING, reader.lockBegin(nullptr, resIdDb, MODE_S)); // R -> W // F -> W - ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(resIdFlush, MODE_S)); + ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(nullptr, resIdFlush, MODE_S)); // W yields its flush lock, so now f is granted in mode S // @@ -164,14 +164,14 @@ TEST(Deadlock, IndirectWithUpgrade) { // // R -> W // F -> R - ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(resIdFlush, MODE_X)); + ASSERT_EQUALS(LOCK_WAITING, flush.lockBegin(nullptr, resIdFlush, MODE_X)); // W comes back from the commit and tries to re-acquire the flush lock // // R -> W // F -> R // W -> F - ASSERT_EQUALS(LOCK_WAITING, writer.lockBegin(resIdFlush, MODE_IX)); + ASSERT_EQUALS(LOCK_WAITING, writer.lockBegin(nullptr, resIdFlush, MODE_IX)); // Run deadlock detection from the point of view of each of the involved lockers DeadlockDetector wfgF(*getGlobalLockManager(), &flush); diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index d7991ac1a26..790ba748a0b 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -357,7 +357,7 @@ LockResult LockerImpl<IsForMMAPV1>::_lockGlobalBegin(OperationContext* opCtx, } _modeForTicket = mode; } - const LockResult result = lockBegin(resourceIdGlobal, mode); + const LockResult result = lockBegin(opCtx, resourceIdGlobal, mode); if (result == LOCK_OK) return LOCK_OK; @@ -479,7 +479,7 @@ template <bool IsForMMAPV1> LockResult LockerImpl<IsForMMAPV1>::lock( OperationContext* opCtx, ResourceId resId, LockMode mode, Date_t deadline, bool checkDeadlock) { - const LockResult result = lockBegin(resId, mode); + const LockResult result = lockBegin(opCtx, resId, mode); // Fast, uncontended path if (result == LOCK_OK) @@ -723,7 +723,9 @@ void LockerImpl<IsForMMAPV1>::restoreLockState(OperationContext* opCtx, } template <bool IsForMMAPV1> -LockResult LockerImpl<IsForMMAPV1>::lockBegin(ResourceId resId, LockMode mode) { +LockResult LockerImpl<IsForMMAPV1>::lockBegin(OperationContext* opCtx, + ResourceId resId, + LockMode mode) { dassert(!getWaitingResource().isValid()); LockRequest* request; @@ -788,6 +790,16 @@ LockResult LockerImpl<IsForMMAPV1>::lockBegin(ResourceId resId, LockMode mode) { if (result == LOCK_WAITING) { globalStats.recordWait(_id, resId, mode); _stats.recordWait(resId, mode); + } else if (result == LOCK_OK && opCtx && _uninterruptibleLocksRequested == 0) { + // Lock acquisitions are not allowed to succeed when opCtx is marked as interrupted, unless + // the caller requested an uninterruptible lock. + auto interruptStatus = opCtx->checkForInterruptNoAssert(); + if (!interruptStatus.isOK()) { + auto unlockIt = _requests.find(resId); + invariant(unlockIt); + _unlockImpl(&unlockIt); + uassertStatusOK(interruptStatus); + } } return result; diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h index 2c31b941526..257b402024c 100644 --- a/src/mongo/db/concurrency/lock_state.h +++ b/src/mongo/db/concurrency/lock_state.h @@ -205,10 +205,14 @@ public: * In other words for each call to lockBegin, which does not return LOCK_OK, there needs to * be a corresponding call to either lockComplete or unlock. * + * If an operation context is provided that represents an interrupted operation, lockBegin will + * throw an exception whenever it would have been possible to grant the lock with LOCK_OK. This + * behavior can be disabled with an UninterruptibleLockGuard. + * * NOTE: These methods are not public and should only be used inside the class * implementation and for unit-tests and not called directly. */ - LockResult lockBegin(ResourceId resId, LockMode mode); + LockResult lockBegin(OperationContext* opCtx, ResourceId resId, LockMode mode); /** * Waits for the completion of a lock, previously requested through lockBegin or diff --git a/src/mongo/db/concurrency/lock_state_test.cpp b/src/mongo/db/concurrency/lock_state_test.cpp index 5e37fdf5e35..7e299d7cc7f 100644 --- a/src/mongo/db/concurrency/lock_state_test.cpp +++ b/src/mongo/db/concurrency/lock_state_test.cpp @@ -268,12 +268,12 @@ TEST(LockerImpl, CanceledDeadlockUnblocks) { ASSERT(LOCK_OK == locker2.lock(db2, MODE_X)); // Set up locker1 and locker2 for deadlock - ASSERT(LOCK_WAITING == locker1.lockBegin(db2, MODE_X)); - ASSERT(LOCK_WAITING == locker2.lockBegin(db1, MODE_X)); + ASSERT(LOCK_WAITING == locker1.lockBegin(nullptr, db2, MODE_X)); + ASSERT(LOCK_WAITING == locker2.lockBegin(nullptr, db1, MODE_X)); // Locker3 blocks behind locker 2 ASSERT(LOCK_OK == locker3.lockGlobal(MODE_IX)); - ASSERT(LOCK_WAITING == locker3.lockBegin(db1, MODE_S)); + ASSERT(LOCK_WAITING == locker3.lockBegin(nullptr, db1, MODE_S)); // Detect deadlock, canceling our request ASSERT( @@ -442,7 +442,7 @@ TEST(LockerImpl, GetLockerInfoShouldReportPendingLocks) { DefaultLockerImpl conflictingLocker; ASSERT_EQ(LOCK_OK, conflictingLocker.lockGlobal(MODE_IS)); ASSERT_EQ(LOCK_OK, conflictingLocker.lock(dbId, MODE_IS)); - ASSERT_EQ(LOCK_WAITING, conflictingLocker.lockBegin(collectionId, MODE_IS)); + ASSERT_EQ(LOCK_WAITING, conflictingLocker.lockBegin(nullptr, collectionId, MODE_IS)); // Assert the held locks show up in the output of getLockerInfo(). Locker::LockerInfo lockerInfo; diff --git a/src/mongo/db/concurrency/lock_stats_test.cpp b/src/mongo/db/concurrency/lock_stats_test.cpp index 1d064a23fcb..53237a5e0f0 100644 --- a/src/mongo/db/concurrency/lock_stats_test.cpp +++ b/src/mongo/db/concurrency/lock_stats_test.cpp @@ -63,7 +63,7 @@ TEST(LockStats, Wait) { { // This will block LockerForTests lockerConflict(MODE_IX); - ASSERT_EQUALS(LOCK_WAITING, lockerConflict.lockBegin(resId, MODE_S)); + ASSERT_EQUALS(LOCK_WAITING, lockerConflict.lockBegin(nullptr, resId, MODE_S)); // Sleep 1 millisecond so the wait time passes ASSERT_EQUALS( diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index aa0cb13e2e9..c48ab2089eb 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -116,6 +116,11 @@ void profile(OperationContext* opCtx, NetworkOp op) { const string dbName(nsToDatabase(CurOp::get(opCtx)->getNS())); try { + // Even if the operation we are profiling was interrupted, we still want to output the + // profiler entry. This lock guard will prevent lock acquisitions from throwing exceptions + // before we finish writing the entry. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + bool acquireDbXLock = false; while (true) { std::unique_ptr<AutoGetDb> autoGetDb; diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 148bc2231d1..7edfd3d09d4 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -85,6 +85,7 @@ namespace { MONGO_FP_DECLARE(failAllInserts); MONGO_FP_DECLARE(failAllUpdates); MONGO_FP_DECLARE(failAllRemoves); +MONGO_FP_DECLARE(hangDuringBatchInsert); void updateRetryStats(OperationContext* opCtx, bool containsRetry) { if (containsRetry) { @@ -382,7 +383,9 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx, boost::optional<AutoGetCollection> collection; auto acquireCollection = [&] { while (true) { - opCtx->checkForInterrupt(); + if (MONGO_FAIL_POINT(hangDuringBatchInsert)) { + MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangDuringBatchInsert); + } if (MONGO_FAIL_POINT(failAllInserts)) { uasserted(ErrorCodes::InternalError, "failAllInserts failpoint active!"); @@ -620,7 +623,6 @@ static SingleWriteResult performSingleUpdateOp(OperationContext* opCtx, boost::optional<AutoGetCollection> collection; while (true) { - opCtx->checkForInterrupt(); if (MONGO_FAIL_POINT(failAllUpdates)) { uasserted(ErrorCodes::InternalError, "failAllUpdates failpoint active!"); } @@ -776,8 +778,6 @@ static SingleWriteResult performSingleDeleteOp(OperationContext* opCtx, ParsedDelete parsedDelete(opCtx, &request); uassertStatusOK(parsedDelete.parseRequest()); - opCtx->checkForInterrupt(); - if (MONGO_FAIL_POINT(failAllRemoves)) { uasserted(ErrorCodes::InternalError, "failAllRemoves failpoint active!"); } diff --git a/src/mongo/db/repair_database.cpp b/src/mongo/db/repair_database.cpp index 2ff3eb85056..93773d7fdc4 100644 --- a/src/mongo/db/repair_database.cpp +++ b/src/mongo/db/repair_database.cpp @@ -269,6 +269,9 @@ Status repairDatabase(OperationContext* opCtx, dbHolder().close(opCtx, dbName, "database closed for repair"); ON_BLOCK_EXIT([&dbName, &opCtx] { try { + // Ensure that we don't trigger an exception when attempting to take locks. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + // Open the db after everything finishes. auto db = dbHolder().openDb(opCtx, dbName); diff --git a/src/mongo/db/repl/noop_writer.cpp b/src/mongo/db/repl/noop_writer.cpp index 7496baf3179..12af44fb559 100644 --- a/src/mongo/db/repl/noop_writer.cpp +++ b/src/mongo/db/repl/noop_writer.cpp @@ -141,6 +141,9 @@ void NoopWriter::stopWritingPeriodicNoops() { } void NoopWriter::_writeNoop(OperationContext* opCtx) { + // Ensure that we don't trigger an exception when attempting to take locks. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + // Use GlobalLock + lockMMAPV1Flush instead of DBLock to allow return when the lock is not // available. It may happen when the primary steps down and a shared global lock is acquired. Lock::GlobalLock lock(opCtx, MODE_IX, Date_t::now() + Milliseconds(1)); diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index 220e2658a35..6eea5422c0a 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -538,6 +538,9 @@ void MigrationChunkClonerSourceLegacy::_cleanup(OperationContext* opCtx) { } if (_deleteNotifyExec) { + // Don't allow an Interrupt exception to prevent _deleteNotifyExec from getting cleaned up. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + AutoGetCollection autoColl(opCtx, _args.getNss(), MODE_IS); const auto cursorManager = autoColl.getCollection() ? autoColl.getCollection()->getCursorManager() : nullptr; diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index fb67d0e5ff6..01fafcf60ce 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -1143,6 +1143,9 @@ DbResponse receivedGetMore(OperationContext* opCtx, getMore(opCtx, ns, ntoreturn, cursorid, &exhaust, &isCursorAuthorized); } catch (AssertionException& e) { if (isCursorAuthorized) { + // Make sure that killCursorGlobal does not throw an exception if it is interrupted. + UninterruptibleLockGuard noInterrupt(opCtx->lockState()); + // If a cursor with id 'cursorid' was authorized, it may have been advanced // before an exception terminated processGetMore. Erase the ClientCursor // because it may now be out of sync with the client's iteration state. |