summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/find.cpp
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2017-06-05 19:33:37 -0400
committerDavid Storch <david.storch@10gen.com>2017-06-14 18:52:38 -0400
commita68f209e4ff495e805e4d1b12d6fc3381c56a771 (patch)
treef3828ef00f300c6d2d5965fe0d9f9ebb27180572 /src/mongo/db/query/find.cpp
parent2b2ea96ea4a926018dc02bfc1c33ebcfed8bc2a0 (diff)
downloadmongo-a68f209e4ff495e805e4d1b12d6fc3381c56a771.tar.gz
SERVER-29304 Exclude time spent blocking for awaitData from getMore latency reporting.
This change applies to various server diagnostics which include latency metrics: the slow query logs, the system.profile collection, top, global operation latency stats, and per-collection operation latency stats. The secs_running and microsecs_running fields reported in currentOp output, on the other hand, will include time spent blocking for awaitData. Furthermore, this change fixes the following: - getMore operations are no longer double-counted. - Lock acquisition times are now consistently included. This fixes a bug in which time spent acquiring locks was accidentally omitted in some cases.
Diffstat (limited to 'src/mongo/db/query/find.cpp')
-rw-r--r--src/mongo/db/query/find.cpp39
1 files changed, 25 insertions, 14 deletions
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index e03b3318b1a..56bb92cfb77 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -235,6 +235,7 @@ Message getMore(OperationContext* opCtx,
invariant(ntoreturn >= 0);
CurOp& curOp = *CurOp::get(opCtx);
+ curOp.ensureStarted();
// For testing, we may want to fail if we receive a getmore.
if (MONGO_FAIL_POINT(failReceivedGetmore)) {
@@ -247,18 +248,22 @@ Message getMore(OperationContext* opCtx,
// Cursors come in one of two flavors:
// - Cursors owned by the collection cursor manager, such as those generated via the find
- // command. For these cursors, we hold the appropriate collection lock for the duration of
- // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp
- // object appropriately and record execution time via Top upon completion.
+ // command. For these cursors, we hold the appropriate collection lock for the duration of the
+ // getMore using AutoGetCollectionForRead.
// - Cursors owned by the global cursor manager, such as those generated via the aggregate
// command. These cursors either hold no collection state or manage their collection state
- // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to
- // update the CurOp object appropriately and record execution time via Top upon
- // completion.
+ // internally, so we acquire no locks.
//
- // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate
- // 'cursorManager'.
- boost::optional<AutoGetCollectionForReadCommand> readLock;
+ // While we only need to acquire locks in the case of a cursor which is *not* globally owned, we
+ // need to create an AutoStatsTracker in either case. This is responsible for updating
+ // statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand because we may
+ // need to drop and reacquire locks when the cursor is awaitData, but we don't want to update
+ // the stats twice.
+ //
+ // Note that we acquire our locks before our ClientCursorPin, in order to ensure that the pin's
+ // destructor is called before the lock's destructor (if there is one) so that the cursor
+ // cleanup can occur under the lock.
+ boost::optional<AutoGetCollectionForRead> readLock;
boost::optional<AutoStatsTracker> statsTracker;
CursorManager* cursorManager;
@@ -283,6 +288,12 @@ Message getMore(OperationContext* opCtx,
}
} else {
readLock.emplace(opCtx, nss);
+ const int doNotChangeProfilingLevel = 0;
+ statsTracker.emplace(opCtx,
+ nss,
+ Top::LockType::ReadLocked,
+ readLock->getDb() ? readLock->getDb()->getProfilingLevel()
+ : doNotChangeProfilingLevel);
Collection* collection = readLock->getCollection();
uassert(
ErrorCodes::OperationFailed, "collection dropped between getMore calls", collection);
@@ -416,14 +427,13 @@ Message getMore(OperationContext* opCtx,
exec->saveState();
readLock.reset();
- // Block waiting for data for up to 1 second.
+ // Block waiting for data for up to 1 second. Time spent blocking is not counted towards
+ // the total operation latency.
+ curOp.pauseTimer();
Seconds timeout(1);
notifier->wait(notifierVersion, timeout);
notifier.reset();
-
- // Set expected latency to match wait time. This makes sure the logs aren't spammed
- // by awaitData queries that exceed slowms due to blocking on the CappedInsertNotifier.
- curOp.setExpectedLatencyMs(durationCount<Milliseconds>(timeout));
+ curOp.resumeTimer();
// Reacquiring locks.
readLock.emplace(opCtx, nss);
@@ -507,6 +517,7 @@ std::string runQuery(OperationContext* opCtx,
const NamespaceString& nss,
Message& result) {
CurOp& curOp = *CurOp::get(opCtx);
+ curOp.ensureStarted();
uassert(ErrorCodes::InvalidNamespace,
str::stream() << "Invalid ns [" << nss.ns() << "]",