diff options
author | Maria van Keulen <maria.vankeulen@mongodb.com> | 2019-08-21 17:06:24 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-08-21 17:06:24 +0000 |
commit | 0dbb48618e221d378fa25e9a718ea1ee7fb74c4f (patch) | |
tree | 161e8d9e7d3d6523792384617269f1f5196cf2b7 /src/mongo | |
parent | 49eec6c9e9f728ec0217edba3664cce031042338 (diff) | |
download | mongo-0dbb48618e221d378fa25e9a718ea1ee7fb74c4f.tar.gz |
SERVER-42917 Ignore nonsensical wall clock readings in lag calculation
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/storage/flow_control.cpp | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/src/mongo/db/storage/flow_control.cpp b/src/mongo/db/storage/flow_control.cpp index 71e0eff8b96..e811f87d4f5 100644 --- a/src/mongo/db/storage/flow_control.cpp +++ b/src/mongo/db/storage/flow_control.cpp @@ -317,15 +317,21 @@ int FlowControl::getNumTickets() { int ret = 0; const auto thresholdLagMillis = getThresholdLagMillis(); - const bool isHealthy = - getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime) < thresholdLagMillis || - // _approximateOpsBetween will return -1 if the input timestamps are in the same "bucket". - // This is an indication that there are very few ops between the two timestamps. - // - // Don't let the no-op writer on idle systems fool the sophisticated "is the replica set - // lagged" classifier. - _approximateOpsBetween(lastCommitted.opTime.getTimestamp(), - myLastApplied.opTime.getTimestamp()) == -1; + + // Successive lastCommitted and lastApplied wall clock time recordings are not guaranteed to be + // monotonically increasing. Recordings that satisfy the following check result in a negative + // value for lag, so ignore them. + const bool ignoreWallTimes = lastCommitted.wallTime > myLastApplied.wallTime; + + // _approximateOpsBetween will return -1 if the input timestamps are in the same "bucket". + // This is an indication that there are very few ops between the two timestamps. + // + // Don't let the no-op writer on idle systems fool the sophisticated "is the replica set + // lagged" classifier. + const bool isHealthy = !ignoreWallTimes && + (getLagMillis(myLastApplied.wallTime, lastCommitted.wallTime) < thresholdLagMillis || + _approximateOpsBetween(lastCommitted.opTime.getTimestamp(), + myLastApplied.opTime.getTimestamp()) == -1); if (isHealthy) { // The add/multiply technique is used to ensure ticket allocation can ramp up quickly, @@ -340,7 +346,7 @@ int FlowControl::getNumTickets() { auto waitTime = curTimeMicros64() - _startWaitTime; _isLaggedTimeMicros.fetchAndAddRelaxed(waitTime); } - } else if (sustainerAdvanced(_prevMemberData, _currMemberData)) { + } else if (!ignoreWallTimes && sustainerAdvanced(_prevMemberData, _currMemberData)) { // Expected case where flow control has meaningful data from the last period to make a new // calculation. ret = @@ -357,7 +363,7 @@ int FlowControl::getNumTickets() { } } else { // Unexpected case where consecutive readings from the topology state don't meet some basic - // expectations. + // expectations, or where the lag measure is nonsensical. ret = _lastTargetTicketsPermitted.load(); _lastTimeSustainerAdvanced = Date_t::now(); // Since this case does not give conclusive evidence that isLagged could have meaningfully |