From b5ca922655a1d0a438c39c38b2c4434016069cb7 Mon Sep 17 00:00:00 2001 From: Xiangyu Yao Date: Tue, 4 Sep 2018 15:42:43 -0400 Subject: SERVER-26854 LockStats for sub-operations should not include wait time for previous operations --- src/mongo/db/concurrency/lock_state.cpp | 15 +++++++-- src/mongo/db/concurrency/lock_state.h | 6 ++-- src/mongo/db/concurrency/lock_state_test.cpp | 8 ++--- src/mongo/db/concurrency/lock_stats.h | 27 +++++++++++++++ src/mongo/db/concurrency/lock_stats_test.cpp | 38 ++++++++++++++++++++++ src/mongo/db/concurrency/locker.h | 12 +++++-- src/mongo/db/concurrency/locker_noop.h | 6 ++-- src/mongo/db/curop.cpp | 9 +++-- src/mongo/db/curop.h | 6 ++++ src/mongo/db/introspect.cpp | 2 +- src/mongo/db/pipeline/mongod_process_interface.cpp | 3 +- src/mongo/db/transaction_participant.cpp | 13 ++++---- src/mongo/db/transaction_participant_test.cpp | 15 +++++---- 13 files changed, 130 insertions(+), 30 deletions(-) (limited to 'src/mongo/db') diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index 47fd8a408b8..e526931d785 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -538,7 +538,8 @@ ResourceId LockerImpl::getWaitingResource() const { return ResourceId(); } -void LockerImpl::getLockerInfo(LockerInfo* lockerInfo) const { +void LockerImpl::getLockerInfo(LockerInfo* lockerInfo, + const boost::optional lockStatsBase) const { invariant(lockerInfo); // Zero-out the contents @@ -562,11 +563,19 @@ void LockerImpl::getLockerInfo(LockerInfo* lockerInfo) const { lockerInfo->waitingResource = getWaitingResource(); lockerInfo->stats.append(_stats); + + // lockStatsBase is a snapshot of lock stats taken when the sub-operation starts. Only + // sub-operations have lockStatsBase. + if (lockStatsBase) + // Adjust the lock stats by subtracting the lockStatsBase. No mutex is needed because + // lockStatsBase is immutable. + lockerInfo->stats.subtract(*lockStatsBase); } -boost::optional LockerImpl::getLockerInfo() const { +boost::optional LockerImpl::getLockerInfo( + const boost::optional lockStatsBase) const { Locker::LockerInfo lockerInfo; - getLockerInfo(&lockerInfo); + getLockerInfo(&lockerInfo, lockStatsBase); return std::move(lockerInfo); } diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h index 119d7f26da6..daf86c3eb5e 100644 --- a/src/mongo/db/concurrency/lock_state.h +++ b/src/mongo/db/concurrency/lock_state.h @@ -181,8 +181,10 @@ public: virtual ResourceId getWaitingResource() const; - virtual void getLockerInfo(LockerInfo* lockerInfo) const; - virtual boost::optional getLockerInfo() const final; + virtual void getLockerInfo(LockerInfo* lockerInfo, + const boost::optional lockStatsBase) const; + virtual boost::optional getLockerInfo( + const boost::optional lockStatsBase) const final; virtual bool saveLockStateAndUnlock(LockSnapshot* stateOut); diff --git a/src/mongo/db/concurrency/lock_state_test.cpp b/src/mongo/db/concurrency/lock_state_test.cpp index 0bd74aeb209..4d2da27bd94 100644 --- a/src/mongo/db/concurrency/lock_state_test.cpp +++ b/src/mongo/db/concurrency/lock_state_test.cpp @@ -225,7 +225,7 @@ TEST(LockerImpl, DefaultLocker) { // Make sure the flush lock IS NOT held Locker::LockerInfo info; - locker.getLockerInfo(&info); + locker.getLockerInfo(&info, boost::none); ASSERT(!info.waitingResource.isValid()); ASSERT_EQUALS(2U, info.locks.size()); ASSERT_EQUALS(RESOURCE_GLOBAL, info.locks[0].resourceId.getType()); @@ -479,7 +479,7 @@ TEST(LockerImpl, GetLockerInfoShouldReportHeldLocks) { // Assert it shows up in the output of getLockerInfo(). Locker::LockerInfo lockerInfo; - locker.getLockerInfo(&lockerInfo); + locker.getLockerInfo(&lockerInfo, boost::none); ASSERT(lockerInfoContainsLock(lockerInfo, globalId, MODE_IX)); ASSERT(lockerInfoContainsLock(lockerInfo, dbId, MODE_IX)); @@ -510,7 +510,7 @@ TEST(LockerImpl, GetLockerInfoShouldReportPendingLocks) { // Assert the held locks show up in the output of getLockerInfo(). Locker::LockerInfo lockerInfo; - conflictingLocker.getLockerInfo(&lockerInfo); + conflictingLocker.getLockerInfo(&lockerInfo, boost::none); ASSERT(lockerInfoContainsLock(lockerInfo, globalId, MODE_IS)); ASSERT(lockerInfoContainsLock(lockerInfo, dbId, MODE_IS)); ASSERT(lockerInfoContainsLock(lockerInfo, collectionId, MODE_IS)); @@ -528,7 +528,7 @@ TEST(LockerImpl, GetLockerInfoShouldReportPendingLocks) { ASSERT_EQ(LOCK_OK, conflictingLocker.lockComplete(collectionId, MODE_IS, Date_t::now(), checkDeadlock)); - conflictingLocker.getLockerInfo(&lockerInfo); + conflictingLocker.getLockerInfo(&lockerInfo, boost::none); ASSERT_FALSE(lockerInfo.waitingResource.isValid()); ASSERT(conflictingLocker.unlock(collectionId)); diff --git a/src/mongo/db/concurrency/lock_stats.h b/src/mongo/db/concurrency/lock_stats.h index 86f4a26d273..a4bf083bc0e 100644 --- a/src/mongo/db/concurrency/lock_stats.h +++ b/src/mongo/db/concurrency/lock_stats.h @@ -83,6 +83,14 @@ struct LockStatCounters { CounterOps::add(numDeadlocks, other.numDeadlocks); } + template + void subtract(const LockStatCounters& other) { + CounterOps::add(numAcquisitions, -other.numAcquisitions); + CounterOps::add(numWaits, -other.numWaits); + CounterOps::add(combinedWaitTimeMicros, -other.combinedWaitTimeMicros); + CounterOps::add(numDeadlocks, -other.numDeadlocks); + } + void reset() { CounterOps::set(numAcquisitions, 0); CounterOps::set(numWaits, 0); @@ -158,6 +166,25 @@ public: } } + template + void subtract(const LockStats& other) { + typedef LockStatCounters OtherLockStatCountersType; + + for (int i = 0; i < ResourceTypesCount; i++) { + for (int mode = 0; mode < LockModesCount; mode++) { + const OtherLockStatCountersType& otherStats = other._stats[i].modeStats[mode]; + LockStatCountersType& thisStats = _stats[i].modeStats[mode]; + thisStats.subtract(otherStats); + } + } + + for (int mode = 0; mode < LockModesCount; mode++) { + const OtherLockStatCountersType& otherStats = other._oplogStats.modeStats[mode]; + LockStatCountersType& thisStats = _oplogStats.modeStats[mode]; + thisStats.subtract(otherStats); + } + } + void report(BSONObjBuilder* builder) const; void reset(); diff --git a/src/mongo/db/concurrency/lock_stats_test.cpp b/src/mongo/db/concurrency/lock_stats_test.cpp index 53237a5e0f0..726aa47ee8e 100644 --- a/src/mongo/db/concurrency/lock_stats_test.cpp +++ b/src/mongo/db/concurrency/lock_stats_test.cpp @@ -101,4 +101,42 @@ TEST(LockStats, Reporting) { stats.report(&builder); } +TEST(LockStats, Subtraction) { + const ResourceId resId(RESOURCE_COLLECTION, std::string("LockStats.Subtraction")); + + resetGlobalLockStats(); + + LockerForTests locker(MODE_IX); + locker.lock(resId, MODE_X); + + { + LockerForTests lockerConflict(MODE_IX); + ASSERT_EQUALS(LOCK_TIMEOUT, + lockerConflict.lock(resId, MODE_S, Date_t::now() + Milliseconds(1))); + } + + SingleThreadedLockStats stats; + reportGlobalLockingStats(&stats); + ASSERT_EQUALS(1, stats.get(resId, MODE_S).numAcquisitions); + ASSERT_EQUALS(1, stats.get(resId, MODE_S).numWaits); + ASSERT_GREATER_THAN(stats.get(resId, MODE_S).combinedWaitTimeMicros, 0); + + { + LockerForTests lockerConflict(MODE_IX); + ASSERT_EQUALS(LOCK_TIMEOUT, + lockerConflict.lock(resId, MODE_S, Date_t::now() + Milliseconds(1))); + } + + SingleThreadedLockStats stats2; + reportGlobalLockingStats(&stats2); + ASSERT_EQUALS(2, stats2.get(resId, MODE_S).numAcquisitions); + ASSERT_EQUALS(2, stats2.get(resId, MODE_S).numWaits); + ASSERT_GREATER_THAN(stats2.get(resId, MODE_S).combinedWaitTimeMicros, 0); + + stats2.subtract(stats); + ASSERT_EQUALS(1, stats2.get(resId, MODE_S).numAcquisitions); + ASSERT_EQUALS(1, stats2.get(resId, MODE_S).numWaits); + ASSERT_GREATER_THAN(stats2.get(resId, MODE_S).combinedWaitTimeMicros, 0); +} + } // namespace mongo diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h index 9523bc46e92..29b0ca9d364 100644 --- a/src/mongo/db/concurrency/locker.h +++ b/src/mongo/db/concurrency/locker.h @@ -310,13 +310,21 @@ public: SingleThreadedLockStats stats; }; - virtual void getLockerInfo(LockerInfo* lockerInfo) const = 0; + /** + * lockStatsBase is the snapshot of the lock stats taken at the point when the operation starts. + * The precise lock stats of a sub-operation would be the stats from the locker info minus the + * lockStatsBase. + */ + virtual void getLockerInfo( + LockerInfo* lockerInfo, + const boost::optional lockStatsBase) const = 0; /** * Returns boost::none if this is an instance of LockerNoop, or a populated LockerInfo * otherwise. */ - virtual boost::optional getLockerInfo() const = 0; + virtual boost::optional getLockerInfo( + const boost::optional lockStatsBase) const = 0; /** * LockSnapshot captures the state of all resources that are locked, what modes they're diff --git a/src/mongo/db/concurrency/locker_noop.h b/src/mongo/db/concurrency/locker_noop.h index 6b9ac5227f5..232b0b5f685 100644 --- a/src/mongo/db/concurrency/locker_noop.h +++ b/src/mongo/db/concurrency/locker_noop.h @@ -157,11 +157,13 @@ public: MONGO_UNREACHABLE; } - virtual void getLockerInfo(LockerInfo* lockerInfo) const { + virtual void getLockerInfo(LockerInfo* lockerInfo, + boost::optional lockStatsBase) const { MONGO_UNREACHABLE; } - virtual boost::optional getLockerInfo() const { + virtual boost::optional getLockerInfo( + boost::optional lockStatsBase) const { return boost::none; } diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp index 72c8b426968..dbe90b901f4 100644 --- a/src/mongo/db/curop.cpp +++ b/src/mongo/db/curop.cpp @@ -264,7 +264,12 @@ void CurOp::reportCurrentOpForClient(OperationContext* opCtx, } } -CurOp::CurOp(OperationContext* opCtx) : CurOp(opCtx, &_curopStack(opCtx)) {} +CurOp::CurOp(OperationContext* opCtx) : CurOp(opCtx, &_curopStack(opCtx)) { + // If this is a sub-operation, we store the snapshot of lock stats as the base lock stats of the + // current operation. + if (_parent != nullptr) + _lockStatsBase = opCtx->lockState()->getLockerInfo(boost::none)->stats; +} CurOp::CurOp(OperationContext* opCtx, CurOpStack* stack) : _stack(stack) { if (opCtx) { @@ -364,7 +369,7 @@ bool CurOp::completeAndLogOperation(OperationContext* opCtx, client->getPrng().nextCanonicalDouble() < serverGlobalParams.sampleRate; if (shouldLogOp || (shouldSample && _debug.executionTimeMicros > slowMs * 1000LL)) { - const auto lockerInfo = opCtx->lockState()->getLockerInfo(); + auto lockerInfo = opCtx->lockState()->getLockerInfo(_lockStatsBase); log(component) << _debug.report(client, *this, (lockerInfo ? &lockerInfo->stats : nullptr)); } diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index a87be82213b..1c039d8f06c 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -558,6 +558,10 @@ public: _planSummary = std::move(summary); } + const boost::optional getLockStatsBase() { + return _lockStatsBase; + } + private: class CurOpStack; @@ -600,6 +604,8 @@ private: int _numYields{0}; std::string _planSummary; + boost::optional + _lockStatsBase; // This is the snapshot of lock stats taken when curOp is constructed. }; /** diff --git a/src/mongo/db/introspect.cpp b/src/mongo/db/introspect.cpp index 493f80ccc4f..778aa4fb3f3 100644 --- a/src/mongo/db/introspect.cpp +++ b/src/mongo/db/introspect.cpp @@ -90,7 +90,7 @@ void profile(OperationContext* opCtx, NetworkOp op) { { Locker::LockerInfo lockerInfo; - opCtx->lockState()->getLockerInfo(&lockerInfo); + opCtx->lockState()->getLockerInfo(&lockerInfo, CurOp::get(opCtx)->getLockStatsBase()); CurOp::get(opCtx)->debug().append(*CurOp::get(opCtx), lockerInfo.stats, b); } diff --git a/src/mongo/db/pipeline/mongod_process_interface.cpp b/src/mongo/db/pipeline/mongod_process_interface.cpp index 14eb073ef6e..cb6f297946e 100644 --- a/src/mongo/db/pipeline/mongod_process_interface.cpp +++ b/src/mongo/db/pipeline/mongod_process_interface.cpp @@ -522,7 +522,8 @@ BSONObj MongoDInterface::_reportCurrentOpForClient(OperationContext* opCtx, } // Append lock stats before returning. - if (auto lockerInfo = clientOpCtx->lockState()->getLockerInfo()) { + if (auto lockerInfo = clientOpCtx->lockState()->getLockerInfo( + CurOp::get(*clientOpCtx)->getLockStatsBase())) { fillLockerInfo(*lockerInfo, builder); } } diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 8967a9c90b3..1f60964fb3a 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -888,7 +888,7 @@ void TransactionParticipant::_abortTransactionOnSession(WithLock wl) { &Top::get(getGlobalServiceContext())); } _logSlowTransaction(wl, - &(_txnResourceStash->locker()->getLockerInfo())->stats, + &(_txnResourceStash->locker()->getLockerInfo(boost::none))->stats, TransactionState::kAborted, _txnResourceStash->getReadConcernArgs()); _txnResourceStash = boost::none; @@ -912,10 +912,11 @@ void TransactionParticipant::_abortTransactionOnSession(WithLock wl) { void TransactionParticipant::_cleanUpTxnResourceOnOpCtx( WithLock wl, OperationContext* opCtx, TransactionState::StateFlag terminationCause) { // Log the transaction if its duration is longer than the slowMS command threshold. - _logSlowTransaction(wl, - &(opCtx->lockState()->getLockerInfo())->stats, - terminationCause, - repl::ReadConcernArgs::get(opCtx)); + _logSlowTransaction( + wl, + &(opCtx->lockState()->getLockerInfo(CurOp::get(*opCtx)->getLockStatsBase()))->stats, + terminationCause, + repl::ReadConcernArgs::get(opCtx)); // Reset the WUOW. We should be able to abort empty transactions that don't have WUOW. if (opCtx->getWriteUnitOfWork()) { @@ -1015,7 +1016,7 @@ void TransactionParticipant::reportStashedState(BSONObjBuilder* builder) const { stdx::lock_guard lm(_mutex); if (_txnResourceStash && _txnResourceStash->locker()) { - if (auto lockerInfo = _txnResourceStash->locker()->getLockerInfo()) { + if (auto lockerInfo = _txnResourceStash->locker()->getLockerInfo(boost::none)) { invariant(_activeTxnNumber != kUninitializedTxnNumber); builder->append("host", getHostNameCachedAndPort()); builder->append("desc", "inactive transaction"); diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp index 19323de93d4..7680c056a47 100644 --- a/src/mongo/db/transaction_participant_test.cpp +++ b/src/mongo/db/transaction_participant_test.cpp @@ -2592,7 +2592,8 @@ std::string buildTransactionInfoString(OperationContext* opCtx, const TxnNumber txnNum, const int metricValue) { // Calling transactionInfoForLog to get the actual transaction info string. - const auto lockerInfo = opCtx->lockState()->getLockerInfo(); + const auto lockerInfo = + opCtx->lockState()->getLockerInfo(CurOp::get(*opCtx)->getLockStatsBase()); // Building expected transaction info string. StringBuilder parametersInfo; buildParametersInfoString( @@ -2658,7 +2659,7 @@ TEST_F(TransactionsMetricsTest, TestTransactionInfoForLogAfterCommit) { txnParticipant->unstashTransactionResources(opCtx(), "commitTransaction"); txnParticipant->commitUnpreparedTransaction(opCtx()); - const auto lockerInfo = opCtx()->lockState()->getLockerInfo(); + const auto lockerInfo = opCtx()->lockState()->getLockerInfo(boost::none); ASSERT(lockerInfo); std::string testTransactionInfo = txnParticipant->transactionInfoForLogForTest(&lockerInfo->stats, true, readConcernArgs); @@ -2694,7 +2695,7 @@ TEST_F(TransactionsMetricsTest, TestTransactionInfoForLogAfterAbort) { txnParticipant->unstashTransactionResources(opCtx(), "abortTransaction"); txnParticipant->abortActiveTransaction(opCtx()); - const auto lockerInfo = opCtx()->lockState()->getLockerInfo(); + const auto lockerInfo = opCtx()->lockState()->getLockerInfo(boost::none); ASSERT(lockerInfo); std::string testTransactionInfo = @@ -2724,7 +2725,7 @@ DEATH_TEST_F(TransactionsMetricsTest, TestTransactionInfoForLogWithNoLockerInfoS auto txnParticipant = TransactionParticipant::get(opCtx()); - const auto lockerInfo = opCtx()->lockState()->getLockerInfo(); + const auto lockerInfo = opCtx()->lockState()->getLockerInfo(boost::none); ASSERT(lockerInfo); txnParticipant->unstashTransactionResources(opCtx(), "commitTransaction"); @@ -2759,7 +2760,7 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowCommit) { txnParticipant->commitUnpreparedTransaction(opCtx()); stopCapturingLogMessages(); - const auto lockerInfo = opCtx()->lockState()->getLockerInfo(); + const auto lockerInfo = opCtx()->lockState()->getLockerInfo(boost::none); ASSERT(lockerInfo); std::string expectedTransactionInfo = "transaction " + txnParticipant->transactionInfoForLogForTest(&lockerInfo->stats, true, readConcernArgs); @@ -2792,7 +2793,7 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowAbort) { txnParticipant->abortActiveTransaction(opCtx()); stopCapturingLogMessages(); - const auto lockerInfo = opCtx()->lockState()->getLockerInfo(); + const auto lockerInfo = opCtx()->lockState()->getLockerInfo(boost::none); ASSERT(lockerInfo); std::string expectedTransactionInfo = "transaction " + txnParticipant->transactionInfoForLogForTest(&lockerInfo->stats, false, readConcernArgs); @@ -2823,7 +2824,7 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowStashedAbort) { txnParticipant->stashTransactionResources(opCtx()); const auto txnResourceStashLocker = txnParticipant->getTxnResourceStashLockerForTest(); ASSERT(txnResourceStashLocker); - const auto lockerInfo = txnResourceStashLocker->getLockerInfo(); + const auto lockerInfo = txnResourceStashLocker->getLockerInfo(boost::none); serverGlobalParams.slowMS = 10; sleepmillis(serverGlobalParams.slowMS + 1); -- cgit v1.2.1