diff options
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 4 | ||||
-rw-r--r-- | jstests/noPassthrough/set_fcv_prepared_transaction.js | 93 | ||||
-rw-r--r-- | jstests/replsets/dbhash_lock_acquisition.js | 9 | ||||
-rw-r--r-- | src/mongo/db/commands/feature_compatibility_version.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/commands/set_feature_compatibility_version_command.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/concurrency/d_concurrency.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/concurrency/d_concurrency.h | 1 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_manager_defs.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_manager_defs.h | 57 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_stats.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_stats.h | 27 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker.h | 33 | ||||
-rw-r--r-- | src/mongo/db/s/transaction_coordinator_util.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/stats/fill_locker_info.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/storage/control/journal_flusher.cpp | 22 |
16 files changed, 298 insertions, 55 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 805b4e0ae8f..fc3ad73145e 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -156,6 +156,8 @@ last-continuous: test_file: jstests/replsets/apply_ops_dropDatabase.js - ticket: SERVER-64485 test_file: jstests/sharding/update_with_dollar_fields.js + - ticket: SERVER-65821 + test_file: jstests/replsets/dbhash_lock_acquisition.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: @@ -455,6 +457,8 @@ last-lts: test_file: jstests/replsets/buildindexes_false_commit_quorum.js - ticket: SERVER-64780 test_file: jstest/sharding/resharding_change_stream_namespace_filtering.js + - ticket: SERVER-65821 + test_file: jstests/replsets/dbhash_lock_acquisition.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: diff --git a/jstests/noPassthrough/set_fcv_prepared_transaction.js b/jstests/noPassthrough/set_fcv_prepared_transaction.js new file mode 100644 index 00000000000..86cd3b4837a --- /dev/null +++ b/jstests/noPassthrough/set_fcv_prepared_transaction.js @@ -0,0 +1,93 @@ +/** + * Tests running the setFeatureCompatibilityVersion command concurrently with a prepared + * transaction. Specifically, runs the setFCV right before the TransactionCoordinator writes the + * commit decision. + * + * @tags: [ + * requires_sharding, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); + +const st = new ShardingTest({shards: 2}); +const shard0Primary = st.rs0.getPrimary(); + +// Set up a sharded collection with two chunks: +// shard0: [MinKey, 0] +// shard1: [0, MaxKey] +const dbName = "testDb"; +const collName = "testColl"; +const ns = dbName + "." + collName; +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}})); +assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 0}})); +assert.commandWorked( + st.s.adminCommand({moveChunk: ns, find: {x: MinKey}, to: st.shard0.shardName})); +assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {x: 1}, to: st.shard1.shardName})); + +function runTxn(mongosHost, dbName, collName) { + const mongosConn = new Mongo(mongosHost); + jsTest.log("Starting a cross-shard transaction with shard0 and shard1 as the participants " + + "and shard0 as the coordinator shard"); + const lsid = {id: UUID()}; + const txnNumber = NumberLong(35); + assert.commandWorked(mongosConn.getDB(dbName).runCommand({ + insert: collName, + documents: [{x: -1}], + lsid, + txnNumber, + startTransaction: true, + autocommit: false, + })); + assert.commandWorked(mongosConn.getDB(dbName).runCommand({ + insert: collName, + documents: [{x: 1}], + lsid, + txnNumber, + autocommit: false, + })); + assert.commandWorked( + mongosConn.adminCommand({commitTransaction: 1, lsid, txnNumber, autocommit: false})); + jsTest.log("Committed the cross-shard transaction"); +} + +function runSetFCV(primaryHost) { + const primaryConn = new Mongo(primaryHost); + jsTest.log("Starting a setFCV command on " + primaryHost); + assert.commandWorked(primaryConn.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV})); + jsTest.log("Completed the setFCV command"); +} + +// Run a cross-shard transaction that has shard0 as the coordinator. Make the TransactionCoordinator +// thread hang right before the commit decision is written (i.e. after the transaction has entered +// the "prepared" state). +const writeDecisionFp = configureFailPoint(shard0Primary, "hangBeforeWritingDecision"); +const txnThread = new Thread(runTxn, st.s.host, dbName, collName); +txnThread.start(); +writeDecisionFp.wait(); + +// Run a setFCV command against shard0 and wait for the setFCV thread to start waiting to acquire +// the setFCV S lock (i.e. waiting for existing prepared transactions to commit or abort). +const setFCVThread = new Thread(runSetFCV, shard0Primary.host); +setFCVThread.start(); +assert.soon(() => { + return shard0Primary.getDB(dbName).currentOp().inprog.find( + op => op.command && op.command.setFeatureCompatibilityVersion && op.locks && + op.locks.FeatureCompatibilityVersion === "R" && op.waitingForLock === true); +}); + +// Unpause the TransactionCoordinator. The transaction should be able to commit despite the fact +// that the FCV S lock is enqueued. +writeDecisionFp.off(); + +jsTest.log("Waiting for the cross-shard transaction to commit"); +txnThread.join(); +jsTest.log("Waiting for setFCV command to complete"); +setFCVThread.join(); +jsTest.log("Done"); + +st.stop(); +})(); diff --git a/jstests/replsets/dbhash_lock_acquisition.js b/jstests/replsets/dbhash_lock_acquisition.js index c05a5c1d698..594df5dc8e0 100644 --- a/jstests/replsets/dbhash_lock_acquisition.js +++ b/jstests/replsets/dbhash_lock_acquisition.js @@ -32,8 +32,13 @@ assert.commandWorked(sessionDB.mycoll.insert({})); const ops = db.currentOp({"lsid.id": session.getSessionId().id}).inprog; assert.eq( 1, ops.length, () => "Failed to find session in currentOp() output: " + tojson(db.currentOp())); -assert.eq(ops[0].locks, - {ReplicationStateTransition: "w", Global: "w", Database: "w", Collection: "w"}); +assert.eq(ops[0].locks, { + FeatureCompatibilityVersion: "w", + ReplicationStateTransition: "w", + Global: "w", + Database: "w", + Collection: "w", +}); const threadCaptruncCmd = new Thread(function(host) { try { diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp index 5adcdffef21..fc0c2ff0fde 100644 --- a/src/mongo/db/commands/feature_compatibility_version.cpp +++ b/src/mongo/db/commands/feature_compatibility_version.cpp @@ -194,7 +194,7 @@ private: * * setFCV takes this lock in exclusive mode when changing the FCV value. */ -Lock::ResourceMutex fcvLock("featureCompatibilityVersionLock"); +Lock::ResourceMutex fcvDocumentLock("featureCompatibilityVersionDocumentLock"); // lastFCVUpdateTimestamp contains the latest oplog entry timestamp which updated the FCV. // It is reset on rollback. Timestamp lastFCVUpdateTimestamp; @@ -506,7 +506,7 @@ void FeatureCompatibilityVersion::fassertInitializedAfterStartup(OperationContex Lock::ExclusiveLock FeatureCompatibilityVersion::enterFCVChangeRegion(OperationContext* opCtx) { invariant(!opCtx->lockState()->isLocked()); - return Lock::ExclusiveLock(opCtx->lockState(), fcvLock); + return Lock::ExclusiveLock(opCtx->lockState(), fcvDocumentLock); } void FeatureCompatibilityVersion::advanceLastFCVUpdateTimestamp(Timestamp fcvUpdateTimestamp) { @@ -571,7 +571,7 @@ FixedFCVRegion::FixedFCVRegion(OperationContext* opCtx) : _lk([&] { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->lockState()->isRSTLLocked()); - return Lock::SharedLock(opCtx->lockState(), fcvLock); + return Lock::SharedLock(opCtx->lockState(), fcvDocumentLock); }()) {} FixedFCVRegion::~FixedFCVRegion() = default; diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index f07008508d8..b205e77b981 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -478,13 +478,15 @@ private: _cancelTenantMigrations(opCtx); { - // Take the global lock in S mode to create a barrier for operations taking the global - // IX or X locks. This ensures that either: + // Take the FCV full transition lock in S mode to create a barrier for operations taking + // the global IX or X locks, which implicitly take the FCV full transition lock in IX + // mode (aside from those which explicitly opt out). This ensures that either: // - The global IX/X locked operation will start after the FCV change, see the // upgrading to the latest FCV and act accordingly. // - The global IX/X locked operation began prior to the FCV change, is acting on that // assumption and will finish before upgrade procedures begin right after this. - Lock::GlobalLock lk(opCtx, MODE_S); + Lock::ResourceLock lk( + opCtx, opCtx->lockState(), resourceIdFeatureCompatibilityVersion, MODE_S); } // (Generic FCV reference): TODO SERVER-60912: When kLastLTS is 6.0, remove this FCV-gated @@ -593,13 +595,15 @@ private: _cancelTenantMigrations(opCtx); { - // Take the global lock in S mode to create a barrier for operations taking the global - // IX or X locks. This ensures that either + // Take the FCV full transition lock in S mode to create a barrier for operations taking + // the global IX or X locks, which implicitly take the FCV full transition lock in IX + // mode (aside from those which explicitly opt out). This ensures that either: // - The global IX/X locked operation will start after the FCV change, see the // downgrading to the last-lts or last-continuous FCV and act accordingly. // - The global IX/X locked operation began prior to the FCV change, is acting on that // assumption and will finish before downgrade procedures begin right after this. - Lock::GlobalLock lk(opCtx, MODE_S); + Lock::ResourceLock lk( + opCtx, opCtx->lockState(), resourceIdFeatureCompatibilityVersion, MODE_S); } // (Generic FCV reference): TODO SERVER-60912: When kLastLTS is 6.0, remove this FCV-gated diff --git a/src/mongo/db/concurrency/d_concurrency.cpp b/src/mongo/db/concurrency/d_concurrency.cpp index db11862b65b..d63ac182dc1 100644 --- a/src/mongo/db/concurrency/d_concurrency.cpp +++ b/src/mongo/db/concurrency/d_concurrency.cpp @@ -133,6 +133,7 @@ Lock::GlobalLock::GlobalLock(OperationContext* opCtx, : _opCtx(opCtx), _result(LOCK_INVALID), _pbwm(opCtx->lockState(), resourceIdParallelBatchWriterMode), + _fcvLock(opCtx->lockState(), resourceIdFeatureCompatibilityVersion), _interruptBehavior(behavior), _skipRSTLLock(skipRSTLLock), _isOutermostLock(!opCtx->lockState()->isLocked()) { @@ -148,6 +149,17 @@ Lock::GlobalLock::GlobalLock(OperationContext* opCtx, } }); + if (_opCtx->lockState()->shouldConflictWithSetFeatureCompatibilityVersion() && + !isSharedLockMode(lockMode)) { + _fcvLock.lock(_opCtx, MODE_IX, deadline); + } + ScopeGuard unlockFCVLock([this, lockMode] { + if (_opCtx->lockState()->shouldConflictWithSetFeatureCompatibilityVersion() && + !isSharedLockMode(lockMode)) { + _fcvLock.unlock(); + } + }); + _result = LOCK_INVALID; if (skipRSTLLock) { _takeGlobalLockOnly(lockMode, deadline); @@ -156,6 +168,7 @@ Lock::GlobalLock::GlobalLock(OperationContext* opCtx, } _result = LOCK_OK; + unlockFCVLock.dismiss(); unlockPBWM.dismiss(); } catch (const ExceptionForCat<ErrorCategory::Interruption>&) { // The kLeaveUnlocked behavior suppresses this exception. @@ -184,6 +197,7 @@ Lock::GlobalLock::GlobalLock(GlobalLock&& otherLock) : _opCtx(otherLock._opCtx), _result(otherLock._result), _pbwm(std::move(otherLock._pbwm)), + _fcvLock(std::move(otherLock._fcvLock)), _interruptBehavior(otherLock._interruptBehavior), _skipRSTLLock(otherLock._skipRSTLLock), _isOutermostLock(otherLock._isOutermostLock) { diff --git a/src/mongo/db/concurrency/d_concurrency.h b/src/mongo/db/concurrency/d_concurrency.h index 1c21e9362af..4e3433d5f6d 100644 --- a/src/mongo/db/concurrency/d_concurrency.h +++ b/src/mongo/db/concurrency/d_concurrency.h @@ -252,6 +252,7 @@ public: OperationContext* const _opCtx; LockResult _result; ResourceLock _pbwm; + ResourceLock _fcvLock; InterruptBehavior _interruptBehavior; bool _skipRSTLLock; const bool _isOutermostLock; diff --git a/src/mongo/db/concurrency/lock_manager_defs.cpp b/src/mongo/db/concurrency/lock_manager_defs.cpp index 01ae4586cad..bc9c357cc01 100644 --- a/src/mongo/db/concurrency/lock_manager_defs.cpp +++ b/src/mongo/db/concurrency/lock_manager_defs.cpp @@ -35,8 +35,13 @@ namespace mongo { const ResourceId resourceIdLocalDB = ResourceId(RESOURCE_DATABASE, StringData("local")); const ResourceId resourceIdOplog = ResourceId(RESOURCE_COLLECTION, StringData("local.oplog.rs")); const ResourceId resourceIdAdminDB = ResourceId(RESOURCE_DATABASE, StringData("admin")); -const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL); -const ResourceId resourceIdParallelBatchWriterMode = ResourceId(RESOURCE_PBWM, 1ULL); -const ResourceId resourceIdReplicationStateTransitionLock = ResourceId(RESOURCE_RSTL, 1ULL); +const ResourceId resourceIdGlobal = + ResourceId(RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kGlobal)); +const ResourceId resourceIdParallelBatchWriterMode = + ResourceId(RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kParallelBatchWriterMode)); +const ResourceId resourceIdFeatureCompatibilityVersion = ResourceId( + RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kFeatureCompatibilityVersion)); +const ResourceId resourceIdReplicationStateTransitionLock = ResourceId( + RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kReplicationStateTransitionLock)); } // namespace mongo diff --git a/src/mongo/db/concurrency/lock_manager_defs.h b/src/mongo/db/concurrency/lock_manager_defs.h index 857599e8cb8..d2fe7a1df62 100644 --- a/src/mongo/db/concurrency/lock_manager_defs.h +++ b/src/mongo/db/concurrency/lock_manager_defs.h @@ -155,12 +155,6 @@ enum LockResult { enum ResourceType { RESOURCE_INVALID = 0, - /** Parallel batch writer mode lock */ - RESOURCE_PBWM, - - /** Replication state transition lock. */ - RESOURCE_RSTL, - /** Used for global exclusive operations */ RESOURCE_GLOBAL, @@ -177,21 +171,42 @@ enum ResourceType { }; /** + * IDs for usages of RESOURCE_GLOBAL. + */ +enum class ResourceGlobalId : uint8_t { + kParallelBatchWriterMode, + kFeatureCompatibilityVersion, + kReplicationStateTransitionLock, + kGlobal, + + // The number of global resource ids. Always insert new ids above this entry. + kNumIds +}; + +/** * Maps the resource id to a human-readable string. */ -static const char* ResourceTypeNames[] = {"Invalid", - "ParallelBatchWriterMode", - "ReplicationStateTransition", - "Global", - "Database", - "Collection", - "Metadata", - "Mutex"}; +static const char* ResourceTypeNames[] = { + "Invalid", "Global", "Database", "Collection", "Metadata", "Mutex"}; + +/** + * Maps the global resource id to a human-readable string. + */ +static const char* ResourceGlobalIdNames[] = { + "ParallelBatchWriterMode", + "FeatureCompatibilityVersion", + "ReplicationStateTransition", + "Global", +}; // Ensure we do not add new types without updating the names array. MONGO_STATIC_ASSERT((sizeof(ResourceTypeNames) / sizeof(ResourceTypeNames[0])) == ResourceTypesCount); +// Ensure we do not add new global resource ids without updating the names array. +MONGO_STATIC_ASSERT((sizeof(ResourceGlobalIdNames) / sizeof(ResourceGlobalIdNames[0])) == + static_cast<uint8_t>(ResourceGlobalId::kNumIds)); + /** * Returns a human-readable name for the specified resource type. */ @@ -200,6 +215,13 @@ static const char* resourceTypeName(ResourceType resourceType) { } /** + * Returns a human-readable name for the specified global resource. + */ +static const char* resourceGlobalIdName(ResourceGlobalId id) { + return ResourceGlobalIdNames[static_cast<uint8_t>(id)]; +} + +/** * Uniquely identifies a lockable resource. */ class ResourceId { @@ -291,9 +313,14 @@ extern const ResourceId resourceIdGlobal; // this lock. extern const ResourceId resourceIdParallelBatchWriterMode; +// Hardcoded resource id for a full FCV transition from start -> upgrading -> upgraded (or +// equivalent for downgrading). This lock is used as a barrier to prevent writes from spanning an +// FCV change. This lock is acquired after the PBWM but before the RSTL and resourceIdGlobal. +extern const ResourceId resourceIdFeatureCompatibilityVersion; + // Hardcoded resource id for the ReplicationStateTransitionLock (RSTL). This lock is acquired in // mode X for any replication state transition and is acquired by all other reads and writes in mode -// IX. This lock is acquired after the PBWM but before the resourceIdGlobal. +// IX. This lock is acquired after the PBWM and FCV locks but before the resourceIdGlobal. extern const ResourceId resourceIdReplicationStateTransitionLock; /** diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index 33567b900b8..fa7dd845bb4 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -144,8 +144,6 @@ bool LockerImpl::_shouldDelayUnlock(ResourceId resId, LockMode mode) const { return false; case RESOURCE_GLOBAL: - case RESOURCE_PBWM: - case RESOURCE_RSTL: case RESOURCE_DATABASE: case RESOURCE_COLLECTION: case RESOURCE_METADATA: @@ -437,8 +435,7 @@ bool LockerImpl::unlockGlobal() { // error for any lock used with multi-granularity locking to have more references than // the global lock, because every scope starts by calling lockGlobal. const auto resType = it.key().getType(); - if (resType == RESOURCE_GLOBAL || resType == RESOURCE_PBWM || resType == RESOURCE_RSTL || - resType == RESOURCE_MUTEX) { + if (resType == RESOURCE_GLOBAL || resType == RESOURCE_MUTEX) { it.next(); } else { invariant(_unlockImpl(&it)); @@ -789,8 +786,9 @@ bool LockerImpl::saveLockStateAndUnlock(Locker::LockSnapshot* stateOut) { // We should never have to save and restore metadata locks. invariant(RESOURCE_DATABASE == resType || RESOURCE_COLLECTION == resType || - (RESOURCE_PBWM == resType && isSharedLockMode(it->mode)) || - (RESOURCE_RSTL == resType && it->mode == MODE_IX)); + (resId == resourceIdParallelBatchWriterMode && isSharedLockMode(it->mode)) || + resId == resourceIdFeatureCompatibilityVersion || + (resId == resourceIdReplicationStateTransitionLock && it->mode == MODE_IX)); // And, stuff the info into the out parameter. OneLock info; @@ -890,7 +888,7 @@ LockResult LockerImpl::_lockBegin(OperationContext* opCtx, ResourceId resId, Loc // Give priority to the full modes for Global, PBWM, and RSTL resources so we don't stall global // operations such as shutdown or stepdown. const ResourceType resType = resId.getType(); - if (resType == RESOURCE_GLOBAL || resType == RESOURCE_PBWM || resType == RESOURCE_RSTL) { + if (resType == RESOURCE_GLOBAL) { if (mode == MODE_S || mode == MODE_X) { request->enqueueAtFront = true; request->compatibleFirst = true; diff --git a/src/mongo/db/concurrency/lock_stats.cpp b/src/mongo/db/concurrency/lock_stats.cpp index f724c90a240..980b4fd0a5e 100644 --- a/src/mongo/db/concurrency/lock_stats.cpp +++ b/src/mongo/db/concurrency/lock_stats.cpp @@ -42,9 +42,15 @@ LockStats<CounterType>::LockStats() { template <typename CounterType> void LockStats<CounterType>::report(BSONObjBuilder* builder) const { - // All indexing below starts from offset 1, because we do not want to report/account - // position 0, which is a sentinel value for invalid resource/no lock. - for (int i = 1; i < ResourceTypesCount; i++) { + for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) { + _report(builder, + resourceGlobalIdName(static_cast<ResourceGlobalId>(i)), + _resourceGlobalStats[i]); + } + + // Index starting from offset 2 because position 0 is a sentinel value for invalid resource/no + // lock, and position 1 is the global resource which was already reported above. + for (int i = 2; i < ResourceTypesCount; i++) { _report(builder, resourceTypeName(static_cast<ResourceType>(i)), _stats[i]); } @@ -119,6 +125,12 @@ void LockStats<CounterType>::_report(BSONObjBuilder* builder, template <typename CounterType> void LockStats<CounterType>::reset() { + for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) { + for (uint8_t mode = 0; mode < LockModesCount; ++mode) { + _resourceGlobalStats[i].modeStats[mode].reset(); + } + } + for (int i = 0; i < ResourceTypesCount; i++) { for (int mode = 0; mode < LockModesCount; mode++) { _stats[i].modeStats[mode].reset(); diff --git a/src/mongo/db/concurrency/lock_stats.h b/src/mongo/db/concurrency/lock_stats.h index d45b3b38864..69855480393 100644 --- a/src/mongo/db/concurrency/lock_stats.h +++ b/src/mongo/db/concurrency/lock_stats.h @@ -141,6 +141,10 @@ public: return _oplogStats.modeStats[mode]; } + if (resId.getType() == RESOURCE_GLOBAL) { + return _resourceGlobalStats[resId.getHashId()].modeStats[mode]; + } + return _stats[resId.getType()].modeStats[mode]; } @@ -148,7 +152,15 @@ public: void append(const LockStats<OtherType>& other) { typedef LockStatCounters<OtherType> OtherLockStatCountersType; - // Append all lock stats + // Append global lock stats. + for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) { + for (uint8_t mode = 0; mode < LockModesCount; ++mode) { + _resourceGlobalStats[i].modeStats[mode].append( + other._resourceGlobalStats[i].modeStats[mode]); + } + } + + // Append all non-global, non-oplog lock stats. for (int i = 0; i < ResourceTypesCount; i++) { for (int mode = 0; mode < LockModesCount; mode++) { const OtherLockStatCountersType& otherStats = other._stats[i].modeStats[mode]; @@ -169,6 +181,13 @@ public: void subtract(const LockStats<OtherType>& other) { typedef LockStatCounters<OtherType> OtherLockStatCountersType; + for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) { + for (uint8_t mode = 0; mode < LockModesCount; ++mode) { + _resourceGlobalStats[i].modeStats[mode].subtract( + other._resourceGlobalStats[i].modeStats[mode]); + } + } + for (int i = 0; i < ResourceTypesCount; i++) { for (int mode = 0; mode < LockModesCount; mode++) { const OtherLockStatCountersType& otherStats = other._stats[i].modeStats[mode]; @@ -206,8 +225,10 @@ private: const PerModeLockStatCounters& stat) const; - // Split the lock stats per resource type. Special-case the oplog so we can collect more - // detailed stats for it. + // For the global resource, split the lock stats per ID since each one should be reported + // separately. For the remaining resources, split the lock stats per resource type. Special-case + // the oplog so we can collect more detailed stats for it. + PerModeLockStatCounters _resourceGlobalStats[static_cast<uint8_t>(ResourceGlobalId::kNumIds)]; PerModeLockStatCounters _stats[ResourceTypesCount]; PerModeLockStatCounters _oplogStats; }; diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h index 37b6dbeedb8..5945ab1857b 100644 --- a/src/mongo/db/concurrency/locker.h +++ b/src/mongo/db/concurrency/locker.h @@ -494,6 +494,18 @@ public: } /** + * If set to false, this opts out of conflicting with the barrier created by the + * setFeatureCompatibilityVersion command. Code that opts-out must be ok with writes being able + * to start under one FCV and complete under a different FCV. + */ + void setShouldConflictWithSetFeatureCompatibilityVersion(bool newValue) { + _shouldConflictWithSetFeatureCompatibilityVersion = newValue; + } + bool shouldConflictWithSetFeatureCompatibilityVersion() const { + return _shouldConflictWithSetFeatureCompatibilityVersion; + } + + /** * If set to true, this opts out of a fatal assertion where operations which are holding open an * oplog hole cannot try to acquire subsequent locks. */ @@ -570,6 +582,7 @@ protected: private: bool _shouldConflictWithSecondaryBatchApplication = true; + bool _shouldConflictWithSetFeatureCompatibilityVersion = true; bool _shouldAllowLockAcquisitionOnTimestampedUnitOfWork = false; bool _shouldAcquireTicket = true; std::string _debugInfo; // Extra info about this locker for debugging purpose @@ -634,6 +647,26 @@ private: }; /** + * RAII-style class to opt out the FeatureCompatibilityVersion lock. + */ +class ShouldNotConflictWithSetFeatureCompatibilityVersionBlock { +public: + explicit ShouldNotConflictWithSetFeatureCompatibilityVersionBlock(Locker* lockState) + : _lockState(lockState), + _originalShouldConflict(_lockState->shouldConflictWithSetFeatureCompatibilityVersion()) { + _lockState->setShouldConflictWithSetFeatureCompatibilityVersion(false); + } + + ~ShouldNotConflictWithSetFeatureCompatibilityVersionBlock() { + _lockState->setShouldConflictWithSetFeatureCompatibilityVersion(_originalShouldConflict); + } + +private: + Locker* const _lockState; + const bool _originalShouldConflict; +}; + +/** * RAII-style class to opt out of a fatal assertion where operations that set a timestamp on a * WriteUnitOfWork cannot try to acquire subsequent locks. When an operation is writing at a * specific timestamp, it creates an oplog hole at that timestamp. The oplog visibility rules only diff --git a/src/mongo/db/s/transaction_coordinator_util.cpp b/src/mongo/db/s/transaction_coordinator_util.cpp index 6c71a3071cb..776ead96215 100644 --- a/src/mongo/db/s/transaction_coordinator_util.cpp +++ b/src/mongo/db/s/transaction_coordinator_util.cpp @@ -351,6 +351,15 @@ repl::OpTime persistDecisionBlocking(OperationContext* opCtx, sessionInfo.setTxnRetryCounter(*txnNumberAndRetryCounter.getTxnRetryCounter()); } + // The transaction participant is already holding a global IX lock (and therefore an FCV IX + // lock) when we get to this point. Since the setFCV command takes an FCV S lock, we can hit a + // deadlock if the setFCV enqueues its lock after the transaction participant has already + // acquired its lock, but before we (the transaction coordinator) acquire ours. To remedy this, + // we choose to bypass this barrier that setFCV creates. This is safe because the setFCV command + // drains any outstanding cross-shard transactions before completing an FCV upgrade/downgrade. + // It does so by waiting for the participant portion of the cross-shard transaction to have + // released its FCV IX lock. + ShouldNotConflictWithSetFeatureCompatibilityVersionBlock noFCVLock{opCtx->lockState()}; DBDirectClient client(opCtx); // Throws if serializing the request or deserializing the response fails. diff --git a/src/mongo/db/stats/fill_locker_info.cpp b/src/mongo/db/stats/fill_locker_info.cpp index f78b3a991e4..1f73d4dcc72 100644 --- a/src/mongo/db/stats/fill_locker_info.cpp +++ b/src/mongo/db/stats/fill_locker_info.cpp @@ -41,12 +41,17 @@ void fillLockerInfo(const Locker::LockerInfo& lockerInfo, BSONObjBuilder& infoBu BSONObjBuilder locks(infoBuilder.subobjStart("locks")); const size_t locksSize = lockerInfo.locks.size(); - // Only add the last lock of each type, and use the largest mode encountered - LockMode modeForType[ResourceTypesCount] = {}; // default initialize to zero (min value) + // Only add the last lock of each type, and use the largest mode encountered. Each type of + // global resource is reported as its own type. + LockMode modeForType[static_cast<uint8_t>(ResourceGlobalId::kNumIds) + ResourceTypesCount - 1] = + {}; // default initialize to zero (min value) for (size_t i = 0; i < locksSize; i++) { const Locker::OneLock& lock = lockerInfo.locks[i]; const ResourceType lockType = lock.resourceId.getType(); - const LockMode lockMode = std::max(lock.mode, modeForType[lockType]); + auto index = lockType == RESOURCE_GLOBAL + ? lock.resourceId.getHashId() + : static_cast<uint8_t>(ResourceGlobalId::kNumIds) + lockType - 1; + const LockMode lockMode = std::max(lock.mode, modeForType[index]); // Check that lockerInfo is sorted on resource type invariant(i == 0 || lockType >= lockerInfo.locks[i - 1].resourceId.getType()); @@ -56,10 +61,16 @@ void fillLockerInfo(const Locker::LockerInfo& lockerInfo, BSONObjBuilder& infoBu continue; } - modeForType[lockType] = lockMode; + modeForType[index] = lockMode; - if (i + 1 < locksSize && lockerInfo.locks[i + 1].resourceId.getType() == lockType) { + if (i + 1 < locksSize && lockerInfo.locks[i + 1].resourceId.getType() == lockType && + (lockType != RESOURCE_GLOBAL || + lock.resourceId.getHashId() == lockerInfo.locks[i + 1].resourceId.getHashId())) { continue; // skip this lock as it is not the last one of its type + } else if (lockType == RESOURCE_GLOBAL) { + locks.append( + resourceGlobalIdName(static_cast<ResourceGlobalId>(lock.resourceId.getHashId())), + legacyModeName(lockMode)); } else { locks.append(resourceTypeName(lockType), legacyModeName(lockMode)); } diff --git a/src/mongo/db/storage/control/journal_flusher.cpp b/src/mongo/db/storage/control/journal_flusher.cpp index 212f51cd88f..765d826cda8 100644 --- a/src/mongo/db/storage/control/journal_flusher.cpp +++ b/src/mongo/db/storage/control/journal_flusher.cpp @@ -87,13 +87,20 @@ void JournalFlusher::run() { [&] { return _flushJournalNow || _needToPause || _shuttingDown; }); } - // Initialize the thread's opCtx. - _uniqueCtx.emplace(tc->makeOperationContext()); + auto setUpOpCtx = [&] { + // Initialize the thread's opCtx. + _uniqueCtx.emplace(tc->makeOperationContext()); - // Updates to a non-replicated collection, oplogTruncateAfterPoint, are made by this thread. - // Non-replicated writes will not contribute to replication lag and can be safely excluded - // from Flow Control. - _uniqueCtx->get()->setShouldParticipateInFlowControl(false); + // Updates to a non-replicated collection, oplogTruncateAfterPoint, are made by this thread. + // Non-replicated writes will not contribute to replication lag and can be safely excluded + // from Flow Control. + _uniqueCtx->get()->setShouldParticipateInFlowControl(false); + + // The journal flusher should not conflict with the setFCV command. + _uniqueCtx->get()->lockState()->setShouldConflictWithSetFeatureCompatibilityVersion(false); + }; + + setUpOpCtx(); while (true) { pauseJournalFlusherBeforeFlush.pauseWhileSet(); try { @@ -107,8 +114,7 @@ void JournalFlusher::run() { // the time during or before the next flush. stdx::lock_guard<Latch> lk(_opCtxMutex); _uniqueCtx.reset(); - _uniqueCtx.emplace(tc->makeOperationContext()); - _uniqueCtx->get()->setShouldParticipateInFlowControl(false); + setUpOpCtx(); }); _uniqueCtx->get()->recoveryUnit()->waitUntilDurable(_uniqueCtx->get()); |