summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2017-06-05 19:33:37 -0400
committerDavid Storch <david.storch@10gen.com>2017-06-14 18:52:38 -0400
commita68f209e4ff495e805e4d1b12d6fc3381c56a771 (patch)
treef3828ef00f300c6d2d5965fe0d9f9ebb27180572 /src
parent2b2ea96ea4a926018dc02bfc1c33ebcfed8bc2a0 (diff)
downloadmongo-a68f209e4ff495e805e4d1b12d6fc3381c56a771.tar.gz
SERVER-29304 Exclude time spent blocking for awaitData from getMore latency reporting.
This change applies to various server diagnostics which include latency metrics: the slow query logs, the system.profile collection, top, global operation latency stats, and per-collection operation latency stats. The secs_running and microsecs_running fields reported in currentOp output, on the other hand, will include time spent blocking for awaitData. Furthermore, this change fixes the following: - getMore operations are no longer double-counted. - Lock acquisition times are now consistently included. This fixes a bug in which time spent acquiring locks was accidentally omitted in some cases.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/assemble_response.cpp7
-rw-r--r--src/mongo/db/commands/find_and_modify.cpp2
-rw-r--r--src/mongo/db/commands/geo_near_cmd.cpp3
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp35
-rw-r--r--src/mongo/db/curop.cpp4
-rw-r--r--src/mongo/db/curop.h94
-rw-r--r--src/mongo/db/db_raii.cpp13
-rw-r--r--src/mongo/db/db_raii.h1
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp10
-rw-r--r--src/mongo/db/query/explain.cpp3
-rw-r--r--src/mongo/db/query/find.cpp39
11 files changed, 142 insertions, 69 deletions
diff --git a/src/mongo/db/assemble_response.cpp b/src/mongo/db/assemble_response.cpp
index 06ac6c090c6..2ecfa4093c3 100644
--- a/src/mongo/db/assemble_response.cpp
+++ b/src/mongo/db/assemble_response.cpp
@@ -493,12 +493,13 @@ mongo::DbResponse mongo::assembleResponse(OperationContext* opCtx, const Message
}
currentOp.ensureStarted();
currentOp.done();
- debug.executionTimeMicros = currentOp.totalTimeMicros();
+ debug.executionTimeMicros = durationCount<Microseconds>(currentOp.elapsedTimeExcludingPauses());
- logThresholdMs += currentOp.getExpectedLatencyMs();
Top::get(opCtx->getServiceContext())
.incrementGlobalLatencyStats(
- opCtx, currentOp.totalTimeMicros(), currentOp.getReadWriteType());
+ opCtx,
+ durationCount<Microseconds>(currentOp.elapsedTimeExcludingPauses()),
+ currentOp.getReadWriteType());
const bool shouldSample = serverGlobalParams.sampleRate == 1.0
? true
diff --git a/src/mongo/db/commands/find_and_modify.cpp b/src/mongo/db/commands/find_and_modify.cpp
index 01d5d70b584..202bbdbf8f6 100644
--- a/src/mongo/db/commands/find_and_modify.cpp
+++ b/src/mongo/db/commands/find_and_modify.cpp
@@ -209,7 +209,7 @@ void recordStatsForTopCommand(OperationContext* opCtx) {
curOp->getNS(),
curOp->getLogicalOp(),
Top::LockType::WriteLocked,
- curOp->elapsedMicros(),
+ durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()),
curOp->isCommand(),
curOp->getReadWriteType());
}
diff --git a/src/mongo/db/commands/geo_near_cmd.cpp b/src/mongo/db/commands/geo_near_cmd.cpp
index 3b56d5074e3..8f693e74680 100644
--- a/src/mongo/db/commands/geo_near_cmd.cpp
+++ b/src/mongo/db/commands/geo_near_cmd.cpp
@@ -320,7 +320,8 @@ public:
stats.append("avgDistance", totalDistance / results);
}
stats.append("maxDistance", farthestDist);
- stats.appendIntOrLL("time", curOp->elapsedMicros() / 1000);
+ stats.appendIntOrLL("time",
+ durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()));
stats.done();
collection->infoCache()->notifyOfQuery(opCtx, summary.indexesUsed);
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index 6cc3799d6a4..615caf264e5 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -168,21 +168,21 @@ public:
// Cursors come in one of two flavors:
// - Cursors owned by the collection cursor manager, such as those generated via the find
// command. For these cursors, we hold the appropriate collection lock for the duration of
- // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp
- // object appropriately and record execution time via Top upon completion.
+ // the getMore using AutoGetCollectionForRead.
// - Cursors owned by the global cursor manager, such as those generated via the aggregate
// command. These cursors either hold no collection state or manage their collection state
- // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to
- // update the CurOp object appropriately and record execution time via Top upon
- // completion.
+ // internally, so we acquire no locks.
//
- // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate
- // 'cursorManager'.
+ // While we only need to acquire locks in the case of a cursor which is *not* globally
+ // owned, we need to create an AutoStatsTracker in either case. This is responsible for
+ // updating statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand
+ // because we may need to drop and reacquire locks when the cursor is awaitData, but we
+ // don't want to update the stats twice.
//
- // Note that we declare our locks before our ClientCursorPin, in order to ensure that
+ // Note that we acquire our locks before our ClientCursorPin, in order to ensure that
// the pin's destructor is called before the lock's destructor (if there is one) so that the
// cursor cleanup can occur under the lock.
- boost::optional<AutoGetCollectionForReadCommand> readLock;
+ boost::optional<AutoGetCollectionForRead> readLock;
boost::optional<AutoStatsTracker> statsTracker;
CursorManager* cursorManager;
@@ -204,6 +204,13 @@ public:
ChunkVersion::IGNORED());
readLock.emplace(opCtx, request.nss);
+ const int doNotChangeProfilingLevel = 0;
+ statsTracker.emplace(opCtx,
+ request.nss,
+ Top::LockType::ReadLocked,
+ readLock->getDb() ? readLock->getDb()->getProfilingLevel()
+ : doNotChangeProfilingLevel);
+
Collection* collection = readLock->getCollection();
if (!collection) {
return appendCommandStatus(result,
@@ -368,15 +375,13 @@ public:
exec->saveState();
readLock.reset();
- // Block waiting for data.
+ // Block waiting for data. Time spent blocking is not counted towards the total
+ // operation latency.
+ curOp->pauseTimer();
const auto timeout = opCtx->getRemainingMaxTimeMicros();
notifier->wait(notifierVersion, timeout);
notifier.reset();
-
- // Set expected latency to match wait time. This makes sure the logs aren't spammed
- // by awaitData queries that exceed slowms due to blocking on the
- // CappedInsertNotifier.
- curOp->setExpectedLatencyMs(durationCount<Milliseconds>(timeout));
+ curOp->resumeTimer();
readLock.emplace(opCtx, request.nss);
exec->restoreState();
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index 1894aaa5fef..451fa96132b 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -349,8 +349,8 @@ void appendAsObjOrString(StringData name,
void CurOp::reportState(BSONObjBuilder* builder) {
if (_start) {
- builder->append("secs_running", elapsedSeconds());
- builder->append("microsecs_running", static_cast<long long int>(elapsedMicros()));
+ builder->append("secs_running", durationCount<Seconds>(elapsedTimeTotal()));
+ builder->append("microsecs_running", durationCount<Microseconds>(elapsedTimeTotal()));
}
builder->append("op", logicalOpToString(_logicalOp));
diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h
index c14af5e731f..bb1f82b1a5d 100644
--- a/src/mongo/db/curop.h
+++ b/src/mongo/db/curop.h
@@ -237,8 +237,7 @@ public:
if (!shouldSample || _dbprofile <= 0)
return false;
- long long opMicros = isDone() ? totalTimeMicros() : elapsedMicros();
- return opMicros >= serverGlobalParams.slowMS * 1000LL;
+ return elapsedTimeExcludingPauses() >= Milliseconds{serverGlobalParams.slowMS};
}
/**
@@ -292,17 +291,67 @@ public:
return _end > 0;
}
- long long totalTimeMicros() {
- massert(12601, "CurOp not marked done yet", _end);
- return _end - startTime();
+ /**
+ * Stops the operation latency timer from "ticking". Time spent paused is not included in the
+ * latencies returned by elapsedTimeExcludingPauses().
+ *
+ * Illegal to call if either the CurOp has not been started, or the CurOp is already in a paused
+ * state.
+ */
+ void pauseTimer() {
+ invariant(isStarted());
+ invariant(_lastPauseTime == 0);
+ _lastPauseTime = curTimeMicros64();
}
- long long elapsedMicros() {
- return curTimeMicros64() - startTime();
+ /**
+ * Starts the operation latency timer "ticking" again. Illegal to call if the CurOp has not been
+ * started and then subsequently paused.
+ */
+ void resumeTimer() {
+ invariant(isStarted());
+ invariant(_lastPauseTime > 0);
+ _totalPausedDuration +=
+ Microseconds{static_cast<long long>(curTimeMicros64()) - _lastPauseTime};
+ _lastPauseTime = 0;
}
- int elapsedSeconds() {
- return static_cast<int>(elapsedMicros() / (1000 * 1000));
+ /**
+ * If this op has been marked as done(), returns the wall clock duration between being marked as
+ * started with ensureStarted() and the call to done().
+ *
+ * Otherwise, returns the wall clock duration between the start time and now.
+ *
+ * If this op has not yet been started, returns 0.
+ */
+ Microseconds elapsedTimeTotal() {
+ if (!isStarted()) {
+ return Microseconds{0};
+ }
+
+ if (!_end) {
+ return Microseconds{static_cast<long long>(curTimeMicros64() - startTime())};
+ } else {
+ return Microseconds{static_cast<long long>(_end - startTime())};
+ }
+ }
+
+ /**
+ * Returns the total elapsed duration minus any time spent in a paused state. See
+ * elapsedTimeTotal() for the definition of the total duration and pause/resumeTimer() for
+ * details on pausing.
+ *
+ * If this op has not yet been started, returns 0.
+ *
+ * Illegal to call while the timer is paused.
+ */
+ Microseconds elapsedTimeExcludingPauses() {
+ invariant(!_lastPauseTime);
+ if (!isStarted()) {
+ return Microseconds{0};
+ }
+
+ return elapsedTimeTotal() - _totalPausedDuration;
}
/**
@@ -377,17 +426,6 @@ public:
}
/**
- * Access to _expectedLatencyMs is not synchronized, so it is illegal for threads other than the
- * one executing the operation to call getExpectedLatencyMs() and setExpectedLatencyMs().
- */
- long long getExpectedLatencyMs() const {
- return _expectedLatencyMs;
- }
- void setExpectedLatencyMs(long long latency) {
- _expectedLatencyMs = latency;
- }
-
- /**
* this should be used very sparingly
* generally the Context should set this up
* but sometimes you want to do it ahead of time
@@ -416,9 +454,20 @@ private:
CurOpStack* _stack;
CurOp* _parent{nullptr};
Command* _command{nullptr};
+
+ // The time at which this CurOp instance was marked as started.
long long _start{0};
+
+ // The time at which this CurOp instance was marked as done.
long long _end{0};
+ // The time at which this CurOp instance had its timer paused, or 0 if the timer is not
+ // currently paused.
+ long long _lastPauseTime{0};
+
+ // The cumulative duration for which the timer has been paused.
+ Microseconds _totalPausedDuration{0};
+
// _networkOp represents the network-level op code: OP_QUERY, OP_GET_MORE, OP_COMMAND, etc.
NetworkOp _networkOp{opInvalid}; // only set this through setNetworkOp_inlock() to keep synced
// _logicalOp is the logical operation type, ie 'dbQuery' regardless of whether this is an
@@ -436,11 +485,6 @@ private:
ProgressMeter _progressMeter;
int _numYields{0};
- // this is how much "extra" time a query might take
- // a writebacklisten for example will block for 30s
- // so this should be 30000 in that case
- long long _expectedLatencyMs{0};
-
std::string _planSummary;
};
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index b9863e956bf..bff855fcc5b 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -38,9 +38,14 @@
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/stats/top.h"
+#include "mongo/util/fail_point_service.h"
namespace mongo {
+namespace {
+MONGO_FP_DECLARE(setAutoGetCollectionWait);
+} // namespace
+
AutoGetDb::AutoGetDb(OperationContext* opCtx, StringData ns, LockMode mode)
: _dbLock(opCtx, ns, mode), _db(dbHolder().get(opCtx, ns)) {}
@@ -59,6 +64,12 @@ AutoGetCollection::AutoGetCollection(OperationContext* opCtx,
db->getViewCatalog()->lookup(opCtx, nss.ns()))
uasserted(ErrorCodes::CommandNotSupportedOnView,
str::stream() << "Namespace " << nss.ns() << " is a view, not a collection");
+
+ // Wait for a configured amount of time after acquiring locks if the failpoint is enabled.
+ MONGO_FAIL_POINT_BLOCK(setAutoGetCollectionWait, customWait) {
+ const BSONObj& data = customWait.getData();
+ sleepFor(Milliseconds(data["waitForMillis"].numberInt()));
+ }
}
AutoGetCollectionOrView::AutoGetCollectionOrView(OperationContext* opCtx,
@@ -107,7 +118,7 @@ AutoStatsTracker::~AutoStatsTracker() {
curOp->getNS(),
curOp->getLogicalOp(),
_lockType,
- _timer.micros(),
+ durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()),
curOp->isCommand(),
curOp->getReadWriteType());
}
diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h
index ce9b1b200ff..641b64e7988 100644
--- a/src/mongo/db/db_raii.h
+++ b/src/mongo/db/db_raii.h
@@ -238,7 +238,6 @@ public:
~AutoStatsTracker();
private:
- const Timer _timer;
OperationContext* _opCtx;
Top::LockType _lockType;
};
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index 757290ef1cc..f8b20c2d10c 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -82,7 +82,8 @@ MONGO_FP_DECLARE(failAllRemoves);
void finishCurOp(OperationContext* opCtx, CurOp* curOp) {
try {
curOp->done();
- long long executionTimeMicros = curOp->totalTimeMicros();
+ long long executionTimeMicros =
+ durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses());
curOp->debug().executionTimeMicros = executionTimeMicros;
recordCurOpMetrics(opCtx);
@@ -91,7 +92,7 @@ void finishCurOp(OperationContext* opCtx, CurOp* curOp) {
curOp->getNS(),
curOp->getLogicalOp(),
Top::LockType::WriteLocked,
- curOp->totalTimeMicros(),
+ durationCount<Microseconds>(curOp->elapsedTimeExcludingPauses()),
curOp->isCommand(),
curOp->getReadWriteType());
@@ -102,8 +103,7 @@ void finishCurOp(OperationContext* opCtx, CurOp* curOp) {
const bool logAll = logger::globalLogDomain()->shouldLog(logger::LogComponent::kCommand,
logger::LogSeverity::Debug(1));
- const bool logSlow = executionTimeMicros >
- (serverGlobalParams.slowMS + curOp->getExpectedLatencyMs()) * 1000LL;
+ const bool logSlow = executionTimeMicros > (serverGlobalParams.slowMS * 1000LL);
const bool shouldSample = serverGlobalParams.sampleRate == 1.0
? true
@@ -405,7 +405,7 @@ WriteResult performInserts(OperationContext* opCtx, const InsertOp& wholeOp) {
wholeOp.ns.ns(),
LogicalOp::opInsert,
Top::LockType::WriteLocked,
- curOp.totalTimeMicros(),
+ durationCount<Microseconds>(curOp.elapsedTimeExcludingPauses()),
curOp.isCommand(),
curOp.getReadWriteType());
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index b243336655b..b3784552f6d 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -763,7 +763,8 @@ void Explain::explainStages(PlanExecutor* exec,
// Generate exec stats BSON for the winning plan.
OperationContext* opCtx = exec->getOpCtx();
- long long totalTimeMillis = CurOp::get(opCtx)->elapsedMicros() / 1000;
+ long long totalTimeMillis =
+ durationCount<Milliseconds>(CurOp::get(opCtx)->elapsedTimeTotal());
generateExecStats(winningStats.get(), verbosity, &execBob, totalTimeMillis);
// Also generate exec stats for all plans, if the verbosity level is high enough.
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index e03b3318b1a..56bb92cfb77 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -235,6 +235,7 @@ Message getMore(OperationContext* opCtx,
invariant(ntoreturn >= 0);
CurOp& curOp = *CurOp::get(opCtx);
+ curOp.ensureStarted();
// For testing, we may want to fail if we receive a getmore.
if (MONGO_FAIL_POINT(failReceivedGetmore)) {
@@ -247,18 +248,22 @@ Message getMore(OperationContext* opCtx,
// Cursors come in one of two flavors:
// - Cursors owned by the collection cursor manager, such as those generated via the find
- // command. For these cursors, we hold the appropriate collection lock for the duration of
- // the getMore using AutoGetCollectionForRead. This will automatically update the CurOp
- // object appropriately and record execution time via Top upon completion.
+ // command. For these cursors, we hold the appropriate collection lock for the duration of the
+ // getMore using AutoGetCollectionForRead.
// - Cursors owned by the global cursor manager, such as those generated via the aggregate
// command. These cursors either hold no collection state or manage their collection state
- // internally, so we acquire no locks. In this case we use the AutoStatsTracker object to
- // update the CurOp object appropriately and record execution time via Top upon
- // completion.
+ // internally, so we acquire no locks.
//
- // Thus, only one of 'readLock' and 'statsTracker' will be populated as we populate
- // 'cursorManager'.
- boost::optional<AutoGetCollectionForReadCommand> readLock;
+ // While we only need to acquire locks in the case of a cursor which is *not* globally owned, we
+ // need to create an AutoStatsTracker in either case. This is responsible for updating
+ // statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand because we may
+ // need to drop and reacquire locks when the cursor is awaitData, but we don't want to update
+ // the stats twice.
+ //
+ // Note that we acquire our locks before our ClientCursorPin, in order to ensure that the pin's
+ // destructor is called before the lock's destructor (if there is one) so that the cursor
+ // cleanup can occur under the lock.
+ boost::optional<AutoGetCollectionForRead> readLock;
boost::optional<AutoStatsTracker> statsTracker;
CursorManager* cursorManager;
@@ -283,6 +288,12 @@ Message getMore(OperationContext* opCtx,
}
} else {
readLock.emplace(opCtx, nss);
+ const int doNotChangeProfilingLevel = 0;
+ statsTracker.emplace(opCtx,
+ nss,
+ Top::LockType::ReadLocked,
+ readLock->getDb() ? readLock->getDb()->getProfilingLevel()
+ : doNotChangeProfilingLevel);
Collection* collection = readLock->getCollection();
uassert(
ErrorCodes::OperationFailed, "collection dropped between getMore calls", collection);
@@ -416,14 +427,13 @@ Message getMore(OperationContext* opCtx,
exec->saveState();
readLock.reset();
- // Block waiting for data for up to 1 second.
+ // Block waiting for data for up to 1 second. Time spent blocking is not counted towards
+ // the total operation latency.
+ curOp.pauseTimer();
Seconds timeout(1);
notifier->wait(notifierVersion, timeout);
notifier.reset();
-
- // Set expected latency to match wait time. This makes sure the logs aren't spammed
- // by awaitData queries that exceed slowms due to blocking on the CappedInsertNotifier.
- curOp.setExpectedLatencyMs(durationCount<Milliseconds>(timeout));
+ curOp.resumeTimer();
// Reacquiring locks.
readLock.emplace(opCtx, nss);
@@ -507,6 +517,7 @@ std::string runQuery(OperationContext* opCtx,
const NamespaceString& nss,
Message& result) {
CurOp& curOp = *CurOp::get(opCtx);
+ curOp.ensureStarted();
uassert(ErrorCodes::InvalidNamespace,
str::stream() << "Invalid ns [" << nss.ns() << "]",