diff options
author | David Storch <david.storch@10gen.com> | 2017-06-05 19:33:37 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-06-14 18:52:38 -0400 |
commit | a68f209e4ff495e805e4d1b12d6fc3381c56a771 (patch) | |
tree | f3828ef00f300c6d2d5965fe0d9f9ebb27180572 /src | |
parent | 2b2ea96ea4a926018dc02bfc1c33ebcfed8bc2a0 (diff) | |
download | mongo-a68f209e4ff495e805e4d1b12d6fc3381c56a771.tar.gz |
SERVER-29304 Exclude time spent blocking for awaitData from getMore latency reporting.
This change applies to various server diagnostics which
include latency metrics: the slow query logs, the
system.profile collection, top, global operation latency
stats, and per-collection operation latency stats. The
secs_running and microsecs_running fields reported in
currentOp output, on the other hand, will include time spent
blocking for awaitData.
Furthermore, this change fixes the following:
- getMore operations are no longer double-counted.
- Lock acquisition times are now consistently included.
This fixes a bug in which time spent acquiring locks was
accidentally omitted in some cases.
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/assemble_response.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/commands/find_and_modify.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/commands/geo_near_cmd.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 35 | ||||
-rw-r--r-- | src/mongo/db/curop.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/curop.h | 94 | ||||
-rw-r--r-- | src/mongo/db/db_raii.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/db_raii.h | 1 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_exec.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/query/explain.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/query/find.cpp | 39 |
11 files changed, 142 insertions, 69 deletions
diff --git a/src/mongo/db/assemble_response.cpp b/src/mongo/db/assemble_response.cpp index 06ac6c090c6..2ecfa4093c3 100644 --- a/src/mongo/db/assemble_response.cpp +++ b/src/mongo/db/assemble_response.cpp @@ -493,12 +493,13 @@ mongo::DbResponse mongo::assembleResponse(OperationContext* opCtx, const Message } currentOp.ensureStarted(); currentOp.done(); - debug.executionTimeMicros = currentOp.totalTimeMicros(); + debug.executionTimeMicros = durationCount<Microseconds>(currentOp.elapsedTimeExcludingPauses()); - logThresholdMs += currentOp.getExpectedLatencyMs(); Top::get(opCtx->getServiceContext()) .incrementGlobalLatencyStats( - opCtx, currentOp.totalTimeMicros(), currentOp.getReadWriteType()); + opCtx, + durationCount<Microseconds>(currentOp.elapsedTimeExcludingPauses()), + currentOp.getReadWriteType()); const bool shouldSample = serverGlobalParams.sampleRate == 1.0 ? true diff --git a/src/mongo/db/commands/find_and_modify.cpp b/src/mongo/db/commands/find_and_modify.cpp index 01d5d70b584..202bbdbf8f6 100644 --- a/src/mongo/db/commands/find_and_modify.cpp +++ b/src/mongo/db/commands/find_and_modify.cpp @@ -209,7 +209,7 @@ void recordStatsForTopCommand(OperationContext* opCtx) { curOp->getNS(), curOp->getLogicalOp(), Top::LockType::WriteLocked, - curOp->elapsedMicros(), + durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()), curOp->isCommand(), curOp->getReadWriteType()); } diff --git a/src/mongo/db/commands/geo_near_cmd.cpp b/src/mongo/db/commands/geo_near_cmd.cpp index 3b56d5074e3..8f693e74680 100644 --- a/src/mongo/db/commands/geo_near_cmd.cpp +++ b/src/mongo/db/commands/geo_near_cmd.cpp @@ -320,7 +320,8 @@ public: stats.append("avgDistance", totalDistance / results); } stats.append("maxDistance", farthestDist); - stats.appendIntOrLL("time", curOp->elapsedMicros() / 1000); + stats.appendIntOrLL("time", + durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses())); stats.done(); collection->infoCache()->notifyOfQuery(opCtx, summary.indexesUsed); diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 6cc3799d6a4..615caf264e5 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -168,21 +168,21 @@ public: // Cursors come in one of two flavors: // - Cursors owned by the collection cursor manager, such as those generated via the find // command. For these cursors, we hold the appropriate collection lock for the duration of - // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp - // object appropriately and record execution time via Top upon completion. + // the getMore using AutoGetCollectionForRead. // - Cursors owned by the global cursor manager, such as those generated via the aggregate // command. These cursors either hold no collection state or manage their collection state - // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to - // update the CurOp object appropriately and record execution time via Top upon - // completion. + // internally, so we acquire no locks. // - // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate - // 'cursorManager'. + // While we only need to acquire locks in the case of a cursor which is *not* globally + // owned, we need to create an AutoStatsTracker in either case. This is responsible for + // updating statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand + // because we may need to drop and reacquire locks when the cursor is awaitData, but we + // don't want to update the stats twice. // - // Note that we declare our locks before our ClientCursorPin, in order to ensure that + // Note that we acquire our locks before our ClientCursorPin, in order to ensure that // the pin's destructor is called before the lock's destructor (if there is one) so that the // cursor cleanup can occur under the lock. - boost::optional<AutoGetCollectionForReadCommand> readLock; + boost::optional<AutoGetCollectionForRead> readLock; boost::optional<AutoStatsTracker> statsTracker; CursorManager* cursorManager; @@ -204,6 +204,13 @@ public: ChunkVersion::IGNORED()); readLock.emplace(opCtx, request.nss); + const int doNotChangeProfilingLevel = 0; + statsTracker.emplace(opCtx, + request.nss, + Top::LockType::ReadLocked, + readLock->getDb() ? readLock->getDb()->getProfilingLevel() + : doNotChangeProfilingLevel); + Collection* collection = readLock->getCollection(); if (!collection) { return appendCommandStatus(result, @@ -368,15 +375,13 @@ public: exec->saveState(); readLock.reset(); - // Block waiting for data. + // Block waiting for data. Time spent blocking is not counted towards the total + // operation latency. + curOp->pauseTimer(); const auto timeout = opCtx->getRemainingMaxTimeMicros(); notifier->wait(notifierVersion, timeout); notifier.reset(); - - // Set expected latency to match wait time. This makes sure the logs aren't spammed - // by awaitData queries that exceed slowms due to blocking on the - // CappedInsertNotifier. - curOp->setExpectedLatencyMs(durationCount<Milliseconds>(timeout)); + curOp->resumeTimer(); readLock.emplace(opCtx, request.nss); exec->restoreState(); diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp index 1894aaa5fef..451fa96132b 100644 --- a/src/mongo/db/curop.cpp +++ b/src/mongo/db/curop.cpp @@ -349,8 +349,8 @@ void appendAsObjOrString(StringData name, void CurOp::reportState(BSONObjBuilder* builder) { if (_start) { - builder->append("secs_running", elapsedSeconds()); - builder->append("microsecs_running", static_cast<long long int>(elapsedMicros())); + builder->append("secs_running", durationCount<Seconds>(elapsedTimeTotal())); + builder->append("microsecs_running", durationCount<Microseconds>(elapsedTimeTotal())); } builder->append("op", logicalOpToString(_logicalOp)); diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index c14af5e731f..bb1f82b1a5d 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -237,8 +237,7 @@ public: if (!shouldSample || _dbprofile <= 0) return false; - long long opMicros = isDone() ? totalTimeMicros() : elapsedMicros(); - return opMicros >= serverGlobalParams.slowMS * 1000LL; + return elapsedTimeExcludingPauses() >= Milliseconds{serverGlobalParams.slowMS}; } /** @@ -292,17 +291,67 @@ public: return _end > 0; } - long long totalTimeMicros() { - massert(12601, "CurOp not marked done yet", _end); - return _end - startTime(); + /** + * Stops the operation latency timer from "ticking". Time spent paused is not included in the + * latencies returned by elapsedTimeExcludingPauses(). + * + * Illegal to call if either the CurOp has not been started, or the CurOp is already in a paused + * state. + */ + void pauseTimer() { + invariant(isStarted()); + invariant(_lastPauseTime == 0); + _lastPauseTime = curTimeMicros64(); } - long long elapsedMicros() { - return curTimeMicros64() - startTime(); + /** + * Starts the operation latency timer "ticking" again. Illegal to call if the CurOp has not been + * started and then subsequently paused. + */ + void resumeTimer() { + invariant(isStarted()); + invariant(_lastPauseTime > 0); + _totalPausedDuration += + Microseconds{static_cast<long long>(curTimeMicros64()) - _lastPauseTime}; + _lastPauseTime = 0; } - int elapsedSeconds() { - return static_cast<int>(elapsedMicros() / (1000 * 1000)); + /** + * If this op has been marked as done(), returns the wall clock duration between being marked as + * started with ensureStarted() and the call to done(). + * + * Otherwise, returns the wall clock duration between the start time and now. + * + * If this op has not yet been started, returns 0. + */ + Microseconds elapsedTimeTotal() { + if (!isStarted()) { + return Microseconds{0}; + } + + if (!_end) { + return Microseconds{static_cast<long long>(curTimeMicros64() - startTime())}; + } else { + return Microseconds{static_cast<long long>(_end - startTime())}; + } + } + + /** + * Returns the total elapsed duration minus any time spent in a paused state. See + * elapsedTimeTotal() for the definition of the total duration and pause/resumeTimer() for + * details on pausing. + * + * If this op has not yet been started, returns 0. + * + * Illegal to call while the timer is paused. + */ + Microseconds elapsedTimeExcludingPauses() { + invariant(!_lastPauseTime); + if (!isStarted()) { + return Microseconds{0}; + } + + return elapsedTimeTotal() - _totalPausedDuration; } /** @@ -377,17 +426,6 @@ public: } /** - * Access to _expectedLatencyMs is not synchronized, so it is illegal for threads other than the - * one executing the operation to call getExpectedLatencyMs() and setExpectedLatencyMs(). - */ - long long getExpectedLatencyMs() const { - return _expectedLatencyMs; - } - void setExpectedLatencyMs(long long latency) { - _expectedLatencyMs = latency; - } - - /** * this should be used very sparingly * generally the Context should set this up * but sometimes you want to do it ahead of time @@ -416,9 +454,20 @@ private: CurOpStack* _stack; CurOp* _parent{nullptr}; Command* _command{nullptr}; + + // The time at which this CurOp instance was marked as started. long long _start{0}; + + // The time at which this CurOp instance was marked as done. long long _end{0}; + // The time at which this CurOp instance had its timer paused, or 0 if the timer is not + // currently paused. + long long _lastPauseTime{0}; + + // The cumulative duration for which the timer has been paused. + Microseconds _totalPausedDuration{0}; + // _networkOp represents the network-level op code: OP_QUERY, OP_GET_MORE, OP_COMMAND, etc. NetworkOp _networkOp{opInvalid}; // only set this through setNetworkOp_inlock() to keep synced // _logicalOp is the logical operation type, ie 'dbQuery' regardless of whether this is an @@ -436,11 +485,6 @@ private: ProgressMeter _progressMeter; int _numYields{0}; - // this is how much "extra" time a query might take - // a writebacklisten for example will block for 30s - // so this should be 30000 in that case - long long _expectedLatencyMs{0}; - std::string _planSummary; }; diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp index b9863e956bf..bff855fcc5b 100644 --- a/src/mongo/db/db_raii.cpp +++ b/src/mongo/db/db_raii.cpp @@ -38,9 +38,14 @@ #include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/s/collection_sharding_state.h" #include "mongo/db/stats/top.h" +#include "mongo/util/fail_point_service.h" namespace mongo { +namespace { +MONGO_FP_DECLARE(setAutoGetCollectionWait); +} // namespace + AutoGetDb::AutoGetDb(OperationContext* opCtx, StringData ns, LockMode mode) : _dbLock(opCtx, ns, mode), _db(dbHolder().get(opCtx, ns)) {} @@ -59,6 +64,12 @@ AutoGetCollection::AutoGetCollection(OperationContext* opCtx, db->getViewCatalog()->lookup(opCtx, nss.ns())) uasserted(ErrorCodes::CommandNotSupportedOnView, str::stream() << "Namespace " << nss.ns() << " is a view, not a collection"); + + // Wait for a configured amount of time after acquiring locks if the failpoint is enabled. + MONGO_FAIL_POINT_BLOCK(setAutoGetCollectionWait, customWait) { + const BSONObj& data = customWait.getData(); + sleepFor(Milliseconds(data["waitForMillis"].numberInt())); + } } AutoGetCollectionOrView::AutoGetCollectionOrView(OperationContext* opCtx, @@ -107,7 +118,7 @@ AutoStatsTracker::~AutoStatsTracker() { curOp->getNS(), curOp->getLogicalOp(), _lockType, - _timer.micros(), + durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()), curOp->isCommand(), curOp->getReadWriteType()); } diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h index ce9b1b200ff..641b64e7988 100644 --- a/src/mongo/db/db_raii.h +++ b/src/mongo/db/db_raii.h @@ -238,7 +238,6 @@ public: ~AutoStatsTracker(); private: - const Timer _timer; OperationContext* _opCtx; Top::LockType _lockType; }; diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index 757290ef1cc..f8b20c2d10c 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -82,7 +82,8 @@ MONGO_FP_DECLARE(failAllRemoves); void finishCurOp(OperationContext* opCtx, CurOp* curOp) { try { curOp->done(); - long long executionTimeMicros = curOp->totalTimeMicros(); + long long executionTimeMicros = + durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()); curOp->debug().executionTimeMicros = executionTimeMicros; recordCurOpMetrics(opCtx); @@ -91,7 +92,7 @@ void finishCurOp(OperationContext* opCtx, CurOp* curOp) { curOp->getNS(), curOp->getLogicalOp(), Top::LockType::WriteLocked, - curOp->totalTimeMicros(), + durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()), curOp->isCommand(), curOp->getReadWriteType()); @@ -102,8 +103,7 @@ void finishCurOp(OperationContext* opCtx, CurOp* curOp) { const bool logAll = logger::globalLogDomain()->shouldLog(logger::LogComponent::kCommand, logger::LogSeverity::Debug(1)); - const bool logSlow = executionTimeMicros > - (serverGlobalParams.slowMS + curOp->getExpectedLatencyMs()) * 1000LL; + const bool logSlow = executionTimeMicros > (serverGlobalParams.slowMS * 1000LL); const bool shouldSample = serverGlobalParams.sampleRate == 1.0 ? true @@ -405,7 +405,7 @@ WriteResult performInserts(OperationContext* opCtx, const InsertOp& wholeOp) { wholeOp.ns.ns(), LogicalOp::opInsert, Top::LockType::WriteLocked, - curOp.totalTimeMicros(), + durationCount<Microseconds>(curOp.elapsedTimeExcludingPauses()), curOp.isCommand(), curOp.getReadWriteType()); diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index b243336655b..b3784552f6d 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -763,7 +763,8 @@ void Explain::explainStages(PlanExecutor* exec, // Generate exec stats BSON for the winning plan. OperationContext* opCtx = exec->getOpCtx(); - long long totalTimeMillis = CurOp::get(opCtx)->elapsedMicros() / 1000; + long long totalTimeMillis = + durationCount<Milliseconds>(CurOp::get(opCtx)->elapsedTimeTotal()); generateExecStats(winningStats.get(), verbosity, &execBob, totalTimeMillis); // Also generate exec stats for all plans, if the verbosity level is high enough. diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index e03b3318b1a..56bb92cfb77 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -235,6 +235,7 @@ Message getMore(OperationContext* opCtx, invariant(ntoreturn >= 0); CurOp& curOp = *CurOp::get(opCtx); + curOp.ensureStarted(); // For testing, we may want to fail if we receive a getmore. if (MONGO_FAIL_POINT(failReceivedGetmore)) { @@ -247,18 +248,22 @@ Message getMore(OperationContext* opCtx, // Cursors come in one of two flavors: // - Cursors owned by the collection cursor manager, such as those generated via the find - // command. For these cursors, we hold the appropriate collection lock for the duration of - // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp - // object appropriately and record execution time via Top upon completion. + // command. For these cursors, we hold the appropriate collection lock for the duration of the + // getMore using AutoGetCollectionForRead. // - Cursors owned by the global cursor manager, such as those generated via the aggregate // command. These cursors either hold no collection state or manage their collection state - // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to - // update the CurOp object appropriately and record execution time via Top upon - // completion. + // internally, so we acquire no locks. // - // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate - // 'cursorManager'. - boost::optional<AutoGetCollectionForReadCommand> readLock; + // While we only need to acquire locks in the case of a cursor which is *not* globally owned, we + // need to create an AutoStatsTracker in either case. This is responsible for updating + // statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand because we may + // need to drop and reacquire locks when the cursor is awaitData, but we don't want to update + // the stats twice. + // + // Note that we acquire our locks before our ClientCursorPin, in order to ensure that the pin's + // destructor is called before the lock's destructor (if there is one) so that the cursor + // cleanup can occur under the lock. + boost::optional<AutoGetCollectionForRead> readLock; boost::optional<AutoStatsTracker> statsTracker; CursorManager* cursorManager; @@ -283,6 +288,12 @@ Message getMore(OperationContext* opCtx, } } else { readLock.emplace(opCtx, nss); + const int doNotChangeProfilingLevel = 0; + statsTracker.emplace(opCtx, + nss, + Top::LockType::ReadLocked, + readLock->getDb() ? readLock->getDb()->getProfilingLevel() + : doNotChangeProfilingLevel); Collection* collection = readLock->getCollection(); uassert( ErrorCodes::OperationFailed, "collection dropped between getMore calls", collection); @@ -416,14 +427,13 @@ Message getMore(OperationContext* opCtx, exec->saveState(); readLock.reset(); - // Block waiting for data for up to 1 second. + // Block waiting for data for up to 1 second. Time spent blocking is not counted towards + // the total operation latency. + curOp.pauseTimer(); Seconds timeout(1); notifier->wait(notifierVersion, timeout); notifier.reset(); - - // Set expected latency to match wait time. This makes sure the logs aren't spammed - // by awaitData queries that exceed slowms due to blocking on the CappedInsertNotifier. - curOp.setExpectedLatencyMs(durationCount<Milliseconds>(timeout)); + curOp.resumeTimer(); // Reacquiring locks. readLock.emplace(opCtx, nss); @@ -507,6 +517,7 @@ std::string runQuery(OperationContext* opCtx, const NamespaceString& nss, Message& result) { CurOp& curOp = *CurOp::get(opCtx); + curOp.ensureStarted(); uassert(ErrorCodes::InvalidNamespace, str::stream() << "Invalid ns [" << nss.ns() << "]", |