diff options
author | David Storch <david.storch@10gen.com> | 2017-06-05 19:33:37 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-06-14 18:52:38 -0400 |
commit | a68f209e4ff495e805e4d1b12d6fc3381c56a771 (patch) | |
tree | f3828ef00f300c6d2d5965fe0d9f9ebb27180572 /src/mongo/db/curop.h | |
parent | 2b2ea96ea4a926018dc02bfc1c33ebcfed8bc2a0 (diff) | |
download | mongo-a68f209e4ff495e805e4d1b12d6fc3381c56a771.tar.gz |
SERVER-29304 Exclude time spent blocking for awaitData from getMore latency reporting.
This change applies to various server diagnostics which
include latency metrics: the slow query logs, the
system.profile collection, top, global operation latency
stats, and per-collection operation latency stats. The
secs_running and microsecs_running fields reported in
currentOp output, on the other hand, will include time spent
blocking for awaitData.
Furthermore, this change fixes the following:
- getMore operations are no longer double-counted.
- Lock acquisition times are now consistently included.
This fixes a bug in which time spent acquiring locks was
accidentally omitted in some cases.
Diffstat (limited to 'src/mongo/db/curop.h')
-rw-r--r-- | src/mongo/db/curop.h | 94 |
1 files changed, 69 insertions, 25 deletions
diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index c14af5e731f..bb1f82b1a5d 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -237,8 +237,7 @@ public: if (!shouldSample || _dbprofile <= 0) return false; - long long opMicros = isDone() ? totalTimeMicros() : elapsedMicros(); - return opMicros >= serverGlobalParams.slowMS * 1000LL; + return elapsedTimeExcludingPauses() >= Milliseconds{serverGlobalParams.slowMS}; } /** @@ -292,17 +291,67 @@ public: return _end > 0; } - long long totalTimeMicros() { - massert(12601, "CurOp not marked done yet", _end); - return _end - startTime(); + /** + * Stops the operation latency timer from "ticking". Time spent paused is not included in the + * latencies returned by elapsedTimeExcludingPauses(). + * + * Illegal to call if either the CurOp has not been started, or the CurOp is already in a paused + * state. + */ + void pauseTimer() { + invariant(isStarted()); + invariant(_lastPauseTime == 0); + _lastPauseTime = curTimeMicros64(); } - long long elapsedMicros() { - return curTimeMicros64() - startTime(); + /** + * Starts the operation latency timer "ticking" again. Illegal to call if the CurOp has not been + * started and then subsequently paused. + */ + void resumeTimer() { + invariant(isStarted()); + invariant(_lastPauseTime > 0); + _totalPausedDuration += + Microseconds{static_cast<long long>(curTimeMicros64()) - _lastPauseTime}; + _lastPauseTime = 0; } - int elapsedSeconds() { - return static_cast<int>(elapsedMicros() / (1000 * 1000)); + /** + * If this op has been marked as done(), returns the wall clock duration between being marked as + * started with ensureStarted() and the call to done(). + * + * Otherwise, returns the wall clock duration between the start time and now. + * + * If this op has not yet been started, returns 0. + */ + Microseconds elapsedTimeTotal() { + if (!isStarted()) { + return Microseconds{0}; + } + + if (!_end) { + return Microseconds{static_cast<long long>(curTimeMicros64() - startTime())}; + } else { + return Microseconds{static_cast<long long>(_end - startTime())}; + } + } + + /** + * Returns the total elapsed duration minus any time spent in a paused state. See + * elapsedTimeTotal() for the definition of the total duration and pause/resumeTimer() for + * details on pausing. + * + * If this op has not yet been started, returns 0. + * + * Illegal to call while the timer is paused. + */ + Microseconds elapsedTimeExcludingPauses() { + invariant(!_lastPauseTime); + if (!isStarted()) { + return Microseconds{0}; + } + + return elapsedTimeTotal() - _totalPausedDuration; } /** @@ -377,17 +426,6 @@ public: } /** - * Access to _expectedLatencyMs is not synchronized, so it is illegal for threads other than the - * one executing the operation to call getExpectedLatencyMs() and setExpectedLatencyMs(). - */ - long long getExpectedLatencyMs() const { - return _expectedLatencyMs; - } - void setExpectedLatencyMs(long long latency) { - _expectedLatencyMs = latency; - } - - /** * this should be used very sparingly * generally the Context should set this up * but sometimes you want to do it ahead of time @@ -416,9 +454,20 @@ private: CurOpStack* _stack; CurOp* _parent{nullptr}; Command* _command{nullptr}; + + // The time at which this CurOp instance was marked as started. long long _start{0}; + + // The time at which this CurOp instance was marked as done. long long _end{0}; + // The time at which this CurOp instance had its timer paused, or 0 if the timer is not + // currently paused. + long long _lastPauseTime{0}; + + // The cumulative duration for which the timer has been paused. + Microseconds _totalPausedDuration{0}; + // _networkOp represents the network-level op code: OP_QUERY, OP_GET_MORE, OP_COMMAND, etc. NetworkOp _networkOp{opInvalid}; // only set this through setNetworkOp_inlock() to keep synced // _logicalOp is the logical operation type, ie 'dbQuery' regardless of whether this is an @@ -436,11 +485,6 @@ private: ProgressMeter _progressMeter; int _numYields{0}; - // this is how much "extra" time a query might take - // a writebacklisten for example will block for 30s - // so this should be 30000 in that case - long long _expectedLatencyMs{0}; - std::string _planSummary; }; |