diff options
author | David Storch <david.storch@10gen.com> | 2017-06-05 19:33:37 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-06-14 18:52:38 -0400 |
commit | a68f209e4ff495e805e4d1b12d6fc3381c56a771 (patch) | |
tree | f3828ef00f300c6d2d5965fe0d9f9ebb27180572 /src/mongo/db/query/find.cpp | |
parent | 2b2ea96ea4a926018dc02bfc1c33ebcfed8bc2a0 (diff) | |
download | mongo-a68f209e4ff495e805e4d1b12d6fc3381c56a771.tar.gz |
SERVER-29304 Exclude time spent blocking for awaitData from getMore latency reporting.
This change applies to various server diagnostics which
include latency metrics: the slow query logs, the
system.profile collection, top, global operation latency
stats, and per-collection operation latency stats. The
secs_running and microsecs_running fields reported in
currentOp output, on the other hand, will include time spent
blocking for awaitData.
Furthermore, this change fixes the following:
- getMore operations are no longer double-counted.
- Lock acquisition times are now consistently included.
This fixes a bug in which time spent acquiring locks was
accidentally omitted in some cases.
Diffstat (limited to 'src/mongo/db/query/find.cpp')
-rw-r--r-- | src/mongo/db/query/find.cpp | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index e03b3318b1a..56bb92cfb77 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -235,6 +235,7 @@ Message getMore(OperationContext* opCtx, invariant(ntoreturn >= 0); CurOp& curOp = *CurOp::get(opCtx); + curOp.ensureStarted(); // For testing, we may want to fail if we receive a getmore. if (MONGO_FAIL_POINT(failReceivedGetmore)) { @@ -247,18 +248,22 @@ Message getMore(OperationContext* opCtx, // Cursors come in one of two flavors: // - Cursors owned by the collection cursor manager, such as those generated via the find - // command. For these cursors, we hold the appropriate collection lock for the duration of - // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp - // object appropriately and record execution time via Top upon completion. + // command. For these cursors, we hold the appropriate collection lock for the duration of the + // getMore using AutoGetCollectionForRead. // - Cursors owned by the global cursor manager, such as those generated via the aggregate // command. These cursors either hold no collection state or manage their collection state - // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to - // update the CurOp object appropriately and record execution time via Top upon - // completion. + // internally, so we acquire no locks. // - // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate - // 'cursorManager'. - boost::optional<AutoGetCollectionForReadCommand> readLock; + // While we only need to acquire locks in the case of a cursor which is *not* globally owned, we + // need to create an AutoStatsTracker in either case. This is responsible for updating + // statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand because we may + // need to drop and reacquire locks when the cursor is awaitData, but we don't want to update + // the stats twice. + // + // Note that we acquire our locks before our ClientCursorPin, in order to ensure that the pin's + // destructor is called before the lock's destructor (if there is one) so that the cursor + // cleanup can occur under the lock. + boost::optional<AutoGetCollectionForRead> readLock; boost::optional<AutoStatsTracker> statsTracker; CursorManager* cursorManager; @@ -283,6 +288,12 @@ Message getMore(OperationContext* opCtx, } } else { readLock.emplace(opCtx, nss); + const int doNotChangeProfilingLevel = 0; + statsTracker.emplace(opCtx, + nss, + Top::LockType::ReadLocked, + readLock->getDb() ? readLock->getDb()->getProfilingLevel() + : doNotChangeProfilingLevel); Collection* collection = readLock->getCollection(); uassert( ErrorCodes::OperationFailed, "collection dropped between getMore calls", collection); @@ -416,14 +427,13 @@ Message getMore(OperationContext* opCtx, exec->saveState(); readLock.reset(); - // Block waiting for data for up to 1 second. + // Block waiting for data for up to 1 second. Time spent blocking is not counted towards + // the total operation latency. + curOp.pauseTimer(); Seconds timeout(1); notifier->wait(notifierVersion, timeout); notifier.reset(); - - // Set expected latency to match wait time. This makes sure the logs aren't spammed - // by awaitData queries that exceed slowms due to blocking on the CappedInsertNotifier. - curOp.setExpectedLatencyMs(durationCount<Milliseconds>(timeout)); + curOp.resumeTimer(); // Reacquiring locks. readLock.emplace(opCtx, nss); @@ -507,6 +517,7 @@ std::string runQuery(OperationContext* opCtx, const NamespaceString& nss, Message& result) { CurOp& curOp = *CurOp::get(opCtx); + curOp.ensureStarted(); uassert(ErrorCodes::InvalidNamespace, str::stream() << "Invalid ns [" << nss.ns() << "]", |