diff options
author | Maria van Keulen <maria@mongodb.com> | 2019-06-04 15:02:35 -0400 |
---|---|---|
committer | Maria van Keulen <maria@mongodb.com> | 2019-06-06 13:19:17 -0400 |
commit | 70de3cf1c8394284cbb89d42df7d1f8672b8b412 (patch) | |
tree | eccdd6c39cc6f05af919d38e383a6da3ec9bfe14 | |
parent | b9677eb4b98fba42c92e97a70eb58b812db1ce7a (diff) | |
download | mongo-70de3cf1c8394284cbb89d42df7d1f8672b8b412.tar.gz |
SERVER-41287 Measure count and duration of periods when isLagged is true
(cherry picked from commit c27cafcd34190924cd62267fa8b42baf3caa69cb)
-rw-r--r-- | src/mongo/db/storage/flow_control.cpp | 38 | ||||
-rw-r--r-- | src/mongo/db/storage/flow_control.h | 7 |
2 files changed, 28 insertions, 17 deletions
diff --git a/src/mongo/db/storage/flow_control.cpp b/src/mongo/db/storage/flow_control.cpp index af180435812..e4ba6ad4b03 100644 --- a/src/mongo/db/storage/flow_control.cpp +++ b/src/mongo/db/storage/flow_control.cpp @@ -190,18 +190,6 @@ double FlowControl::_getLocksPerOp() { BSONObj FlowControl::generateSection(OperationContext* opCtx, const BSONElement& configElement) const { - // Flow Control does not have use for lag measured on nodes that cannot accept writes. - const bool canAcceptWrites = _replCoord->canAcceptNonLocalWrites(); - - // Flow Control is only enabled if FCV is 4.2. - const bool isFCV42 = - (serverGlobalParams.featureCompatibility.isVersionInitialized() && - serverGlobalParams.featureCompatibility.getVersion() == - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42); - - const Date_t myLastAppliedWall = _replCoord->getMyLastAppliedOpTimeAndWallTime().wallTime; - const Date_t lastCommittedWall = _replCoord->getLastCommittedOpTimeAndWallTime().wallTime; - BSONObjBuilder bob; // Most of these values are only computed and meaningful when flow control is enabled. bob.append("enabled", gFlowControlEnabled.load()); @@ -210,9 +198,9 @@ BSONObj FlowControl::generateSection(OperationContext* opCtx, FlowControlTicketholder::get(opCtx)->totalTimeAcquiringMicros()); bob.append("locksPerOp", _lastLocksPerOp.load()); bob.append("sustainerRate", _lastSustainerAppliedCount.load()); - bob.append("isLagged", - isFCV42 && canAcceptWrites && - getLagMillis(myLastAppliedWall, lastCommittedWall) >= getThresholdLagMillis()); + bob.append("isLagged", _isLagged.load()); + bob.append("isLaggedCount", _isLaggedCount.load()); + bob.append("isLaggedTimeMicros", _isLaggedTimeMicros.load()); return bob.obj(); } @@ -353,6 +341,11 @@ int FlowControl::getNumTickets() { gFlowControlTicketMultiplierConstant.load(), _kMaxTickets); _lastTimeSustainerAdvanced = Date_t::now(); + if (_isLagged.load()) { + _isLagged.store(false); + auto waitTime = curTimeMicros64() - _startWaitTime; + _isLaggedTimeMicros.fetchAndAddRelaxed(waitTime); + } } else if (sustainerAdvanced(_prevMemberData, _currMemberData)) { // Expected case where flow control has meaningful data from the last period to make a new // calculation. @@ -363,16 +356,25 @@ int FlowControl::getNumTickets() { locksPerOp, getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime), thresholdLagMillis); + if (!_isLagged.load()) { + _isLagged.store(true); + _isLaggedCount.fetchAndAddRelaxed(1); + _startWaitTime = curTimeMicros64(); + } } else { // Unexpected case where consecutive readings from the topology state don't meet some basic // expectations. ret = _lastTargetTicketsPermitted.load(); _lastTimeSustainerAdvanced = Date_t::now(); + // Since this case does not give conclusive evidence that isLagged could have meaningfully + // transitioned from true to false, it does not make sense to update the _isLagged* + // variables here. } ret = std::max(ret, gFlowControlMinTicketsPerSecond.load()); - LOG(DEBUG_LOG_LEVEL) << "Are lagged? " << !isHealthy << " Curr lag millis: " + LOG(DEBUG_LOG_LEVEL) << "Are lagged? " << (_isLagged.load() ? "true" : "false") + << " Curr lag millis: " << getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime) << " OpsLagged: " << _approximateOpsBetween(lastCommitted.opTime.getTimestamp(), @@ -381,7 +383,9 @@ int FlowControl::getNumTickets() { << " Last granted: " << _lastTargetTicketsPermitted.load() << " Last sustainer applied: " << _lastSustainerAppliedCount.load() << " Acquisitions since last check: " << locksUsedLastPeriod - << " Locks per op: " << _lastLocksPerOp.load(); + << " Locks per op: " << _lastLocksPerOp.load() + << " Count of lagged periods: " << _isLaggedCount.load() + << " Total duration of lagged periods: " << _isLaggedTimeMicros.load(); _lastTargetTicketsPermitted.store(ret); diff --git a/src/mongo/db/storage/flow_control.h b/src/mongo/db/storage/flow_control.h index 8e6014d29b8..e3905a3728b 100644 --- a/src/mongo/db/storage/flow_control.h +++ b/src/mongo/db/storage/flow_control.h @@ -120,6 +120,10 @@ private: AtomicWord<int> _lastTargetTicketsPermitted{_kMaxTickets}; AtomicWord<double> _lastLocksPerOp{0.0}; AtomicWord<int> _lastSustainerAppliedCount{0}; + AtomicWord<bool> _isLagged{false}; + AtomicWord<int> _isLaggedCount{0}; + // Use an int64_t as this is serialized to bson which does not support unsigned 64-bit numbers. + AtomicWord<std::int64_t> _isLaggedTimeMicros{0}; mutable stdx::mutex _sampledOpsMutex; std::deque<Sample> _sampledOpsApplied; @@ -134,6 +138,9 @@ private: std::vector<repl::MemberData> _prevMemberData; Date_t _lastTimeSustainerAdvanced; + + // This value is used for calculating server status metrics. + std::uint64_t _startWaitTime = 0; }; } // namespace mongo |