summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXuerui Fa <xuerui.fa@mongodb.com>2020-01-27 18:17:01 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-05 22:41:37 +0000
commitb0c5c0baa85fba563c80ee416cecc22e9ffbf53a (patch)
tree7b58a9cfa80c94b384b97333708e8a43923b1b33
parent84940076a90c3938decb6a28219f7102307404bd (diff)
downloadmongo-b0c5c0baa85fba563c80ee416cecc22e9ffbf53a.tar.gz
SERVER-43069: Condition logging for slow ops on sample rate
create mode 100644 src/mongo/util/log_with_sampling.h create mode 100644 src/mongo/util/log_with_sampling_test.cpp
-rw-r--r--jstests/replsets/log_secondary_oplog_application.js79
-rw-r--r--src/mongo/db/curop.cpp18
-rw-r--r--src/mongo/db/repl/oplog_applier_impl.cpp19
-rw-r--r--src/mongo/db/transaction_participant.cpp13
-rw-r--r--src/mongo/db/transaction_participant_test.cpp126
-rw-r--r--src/mongo/s/transaction_router.cpp9
-rw-r--r--src/mongo/s/transaction_router_test.cpp39
-rw-r--r--src/mongo/util/SConscript1
-rw-r--r--src/mongo/util/log_with_sampling.h63
-rw-r--r--src/mongo/util/log_with_sampling_test.cpp171
10 files changed, 503 insertions, 35 deletions
diff --git a/jstests/replsets/log_secondary_oplog_application.js b/jstests/replsets/log_secondary_oplog_application.js
index 08075bc22d5..49b1a37cb94 100644
--- a/jstests/replsets/log_secondary_oplog_application.js
+++ b/jstests/replsets/log_secondary_oplog_application.js
@@ -9,6 +9,9 @@
(function() {
"use strict";
+load("jstests/replsets/rslib.js");
+load("jstests/libs/fail_point_util.js");
+
let name = "log_secondary_oplog_application";
let rst = ReplSetTest({name: name, nodes: 2});
rst.startSet();
@@ -44,7 +47,7 @@ assert.throws(function() {
});
/**
- * Part 2: Issue a slow op and make sure that we *do* log it.
+ * Part 2: Issue a slow op and make sure that we *do* log it when the sample rate is set to 1.
* We use a failpoint in applyOplogEntryOrGroupedInserts which blocks after we read the time at the
* start of the application of the op, and we wait there to simulate slowness.
*/
@@ -53,29 +56,83 @@ assert.throws(function() {
assert.commandWorked(primary.getDB(name).createCollection("slowOp"));
rst.awaitReplication();
-// Set "slowMS" to a low value (in milliseconds).
-assert.commandWorked(secondary.getDB(name).setProfilingLevel(1, 20));
+// Set "sampleRate" to 1 and "slowMS" to a low value (in milliseconds).
+assert.commandWorked(secondary.getDB(name).setProfilingLevel(1, {sampleRate: 1, slowms: 20}));
// Hang right after taking note of the start time of the application.
-assert.commandWorked(secondary.adminCommand(
- {configureFailPoint: "hangAfterRecordingOpApplicationStartTime", mode: "alwaysOn"}));
+let hangAfterRecordingOpApplicationsStartTimeFailPoint =
+ configureFailPoint(secondary, "hangAfterRecordingOpApplicationStartTime");
// Issue a write and make sure we've hit the failpoint before moving on.
assert.commandWorked(primary.getDB(name)["slowOp"].insert({"slow": "sloth"}));
-checkLog.contains(
- secondary,
- "applyOplogEntryOrGroupedInserts - fail point hangAfterRecordingOpApplicationStartTime enabled");
+hangAfterRecordingOpApplicationsStartTimeFailPoint.wait();
// Wait for an amount of time safely above the "slowMS" we set.
sleep(0.5 * 1000);
-// Disable the failpoint so the op finish can applying.
-assert.commandWorked(secondary.adminCommand(
- {configureFailPoint: "hangAfterRecordingOpApplicationStartTime", mode: "off"}));
+// Disable the hangAfterRecordingOpApplicationsStartTime failpoint so the op finish can applying.
+hangAfterRecordingOpApplicationsStartTimeFailPoint.off();
// Make sure we log that insert op.
rst.awaitReplication();
checkLog.contains(secondary, "sloth");
+/**
+ * Part 3: Issue a slow op and make sure that we do *not* log it when the sample rate is set to 0.
+ */
+
+// Set "sampleRate" to 0 and "slowMS" to a low value (in milliseconds).
+assert.commandWorked(secondary.getDB(name).setProfilingLevel(1, {sampleRate: 0, slowms: 20}));
+
+// Hang right after taking note of the start time of the application.
+hangAfterRecordingOpApplicationsStartTimeFailPoint =
+ configureFailPoint(secondary, "hangAfterRecordingOpApplicationStartTime");
+
+// Issue a write and make sure we've hit the failpoint before moving on.
+assert.commandWorked(primary.getDB(name)["slowOp"].insert({"slow": "turtle"}));
+hangAfterRecordingOpApplicationsStartTimeFailPoint.wait();
+
+// Wait for an amount of time safely above the "slowMS" we set.
+sleep(0.5 * 1000);
+
+// Disable the hangAfterRecordingOpApplicationsStartTime failpoint so the op finish can applying.
+hangAfterRecordingOpApplicationsStartTimeFailPoint.off();
+
+// Ensure that the write was replicated.
+rst.awaitReplication();
+
+// The op should not have been logged.
+assert.throws(function() {
+ checkLog.contains(secondary, "turtle", 1 * 1000);
+});
+
+/**
+ * Part 4: Issue a slow op and verify that we log it when the sample rate is 0 but log verbosity
+ * is set to 1.
+ */
+
+// Set the log verbosity for the replication component to 1.
+setLogVerbosity(rst.nodes, {"replication": {"verbosity": 1}});
+
+// Hang right after taking note of the start time of the application.
+hangAfterRecordingOpApplicationsStartTimeFailPoint =
+ configureFailPoint(secondary, "hangAfterRecordingOpApplicationStartTime");
+
+// Issue a write and make sure we've hit the failpoint before moving on.
+assert.commandWorked(primary.getDB(name)["slowOp"].insert({"slow": "snail"}));
+hangAfterRecordingOpApplicationsStartTimeFailPoint.wait();
+
+// Wait for an amount of time safely above the "slowMS" we set.
+sleep(0.5 * 1000);
+
+// Disable the hangAfterRecordingOpApplicationsStartTime failpoint so the op finish can applying.
+hangAfterRecordingOpApplicationsStartTimeFailPoint.off();
+
+// Ensure that the write was replicated.
+rst.awaitReplication();
+
+// Make sure we log that insert op.
+checkLog.contains(secondary, "snail");
+
rst.stopSet();
})();
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index de89c4b5789..1ac7a4c71c1 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -53,6 +53,7 @@
#include "mongo/rpc/metadata/impersonated_user_metadata.h"
#include "mongo/util/hex.h"
#include "mongo/util/log.h"
+#include "mongo/util/log_with_sampling.h"
#include "mongo/util/net/socket_utils.h"
#include "mongo/util/str.h"
#include <mongo/db/stats/timer_stats.h>
@@ -421,12 +422,8 @@ bool CurOp::completeAndLogOperation(OperationContext* opCtx,
boost::optional<size_t> responseLength,
boost::optional<long long> slowMsOverride,
bool forceLog) {
- // Log the operation if it is eligible according to the current slowMS and sampleRate settings.
- const bool shouldLogOp = (forceLog || shouldLog(component, logger::LogSeverity::Debug(1)));
const long long slowMs = slowMsOverride.value_or(serverGlobalParams.slowMS);
- const auto client = opCtx->getClient();
-
// Record the size of the response returned to the client, if applicable.
if (responseLength) {
_debug.responseLength = *responseLength;
@@ -436,14 +433,19 @@ bool CurOp::completeAndLogOperation(OperationContext* opCtx,
_end = curTimeMicros64();
_debug.executionTimeMicros = durationCount<Microseconds>(elapsedTimeExcludingPauses());
+ const auto executionTimeMillis = _debug.executionTimeMicros / 1000;
+
if (_debug.isReplOplogFetching) {
- oplogGetMoreStats.recordMillis(_debug.executionTimeMicros / 1000);
+ oplogGetMoreStats.recordMillis(executionTimeMillis);
}
- const bool shouldSample =
- client->getPrng().nextCanonicalDouble() < serverGlobalParams.sampleRate;
+ bool shouldLogSlowOp, shouldSample;
+
+ // Log the operation if it is eligible according to the current slowMS and sampleRate settings.
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx, component, Milliseconds(executionTimeMillis), Milliseconds(slowMs));
- if (shouldLogOp || (shouldSample && _debug.executionTimeMicros > slowMs * 1000LL)) {
+ if (forceLog || shouldLogSlowOp) {
auto lockerInfo = opCtx->lockState()->getLockerInfo(_lockStatsBase);
if (_debug.storageStats == nullptr && opCtx->lockState()->wasGlobalLockTaken() &&
opCtx->getServiceContext()->getStorageEngine()) {
diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp
index 8b7d7ba3c41..1ac70f2a05e 100644
--- a/src/mongo/db/repl/oplog_applier_impl.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl.cpp
@@ -48,6 +48,7 @@
#include "mongo/platform/basic.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/log.h"
+#include "mongo/util/log_with_sampling.h"
#include "third_party/murmurhash3/MurmurHash3.h"
namespace mongo {
@@ -96,17 +97,21 @@ NamespaceStringOrUUID getNsOrUUID(const NamespaceString& nss, const OplogEntry&
* Used for logging a report of ops that take longer than "slowMS" to apply. This is called
* right before returning from applyOplogEntryOrGroupedInserts, and it returns the same status.
*/
-Status finishAndLogApply(ClockSource* clockSource,
+Status finishAndLogApply(OperationContext* opCtx,
+ ClockSource* clockSource,
Status finalStatus,
Date_t applyStartTime,
const OplogEntryOrGroupedInserts& entryOrGroupedInserts) {
if (finalStatus.isOK()) {
auto applyEndTime = clockSource->now();
- auto diffMS = durationCount<Milliseconds>(applyEndTime - applyStartTime);
+ auto opDuration = durationCount<Milliseconds>(applyEndTime - applyStartTime);
- // This op was slow to apply, so we should log a report of it.
- if (diffMS > serverGlobalParams.slowMS) {
+ if (shouldLogSlowOpWithSampling(opCtx,
+ MONGO_LOG_DEFAULT_COMPONENT,
+ Milliseconds(opDuration),
+ Milliseconds(serverGlobalParams.slowMS))
+ .first) {
StringBuilder s;
s << "applied op: ";
@@ -118,7 +123,7 @@ Status finishAndLogApply(ClockSource* clockSource,
}
s << redact(entryOrGroupedInserts.toBSON());
- s << ", took " << diffMS << "ms";
+ s << ", took " << opDuration << "ms";
log() << s.str();
}
@@ -971,7 +976,7 @@ Status applyOplogEntryOrGroupedInserts(OperationContext* opCtx,
throw;
}
});
- return finishAndLogApply(clockSource, status, applyStartTime, entryOrGroupedInserts);
+ return finishAndLogApply(opCtx, clockSource, status, applyStartTime, entryOrGroupedInserts);
} else if (opType == OpTypeEnum::kCommand) {
auto status =
writeConflictRetry(opCtx, "applyOplogEntryOrGroupedInserts_command", nss.ns(), [&] {
@@ -980,7 +985,7 @@ Status applyOplogEntryOrGroupedInserts(OperationContext* opCtx,
incrementOpsAppliedStats();
return status;
});
- return finishAndLogApply(clockSource, status, applyStartTime, entryOrGroupedInserts);
+ return finishAndLogApply(opCtx, clockSource, status, applyStartTime, entryOrGroupedInserts);
}
MONGO_UNREACHABLE;
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 4dd7905add3..7687b9a5d86 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -65,6 +65,7 @@
#include "mongo/db/transaction_participant_gen.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/log.h"
+#include "mongo/util/log_with_sampling.h"
#include "mongo/util/net/socket_utils.h"
namespace mongo {
@@ -1884,11 +1885,15 @@ void TransactionParticipant::Participant::_logSlowTransaction(
// Only log multi-document transactions.
if (!o().txnState.isInRetryableWriteMode()) {
const auto tickSource = opCtx->getServiceContext()->getTickSource();
- // Log the transaction if log message verbosity for transaction component is >= 1 or its
- // duration is longer than the slowMS command threshold.
- if (shouldLog(logger::LogComponent::kTransaction, logger::LogSeverity::Debug(1)) ||
+ const auto opDuration = duration_cast<Milliseconds>(
o().transactionMetricsObserver.getSingleTransactionStats().getDuration(
- tickSource, tickSource->getTicks()) > Milliseconds(serverGlobalParams.slowMS)) {
+ tickSource, tickSource->getTicks()));
+
+ if (shouldLogSlowOpWithSampling(opCtx,
+ logger::LogComponent::kTransaction,
+ opDuration,
+ Milliseconds(serverGlobalParams.slowMS))
+ .first) {
log(logger::LogComponent::kTransaction)
<< "transaction "
<< _transactionInfoForLog(opCtx, lockStats, terminationCause, readConcernArgs);
diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp
index 3e6c30ceb23..33fd84b14ff 100644
--- a/src/mongo/db/transaction_participant_test.cpp
+++ b/src/mongo/db/transaction_participant_test.cpp
@@ -3280,7 +3280,19 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowCommit) {
auto operation = repl::OplogEntry::makeInsertOperation(kNss, _uuid, BSON("TestValue" << 0));
txnParticipant.addTransactionOperation(opCtx(), operation);
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 10;
+ serverGlobalParams.sampleRate = 1;
+
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ // serverGlobalParams may have been modified prior to this test, so we set them back to
+ // their default values.
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
tickSource->advance(Microseconds(11 * 1000));
startCapturingLogMessages();
@@ -3314,7 +3326,18 @@ TEST_F(TransactionsMetricsTest, LogPreparedTransactionInfoAfterSlowCommit) {
const int metricValue = 1;
setupAdditiveMetrics(metricValue, opCtx());
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 10;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
tickSource->advance(Microseconds(11 * 1000));
txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction");
@@ -3353,7 +3376,18 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowAbort) {
txnParticipant.unstashTransactionResources(opCtx(), "abortTransaction");
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 10;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
tickSource->advance(Microseconds(11 * 1000));
startCapturingLogMessages();
@@ -3397,8 +3431,20 @@ TEST_F(TransactionsMetricsTest, LogPreparedTransactionInfoAfterSlowAbort) {
txnParticipant.unstashTransactionResources(opCtx(), "abortTransaction");
txnParticipant.prepareTransaction(opCtx(), {});
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 10;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
tickSource->advance(Microseconds(11 * 1000));
+
auto prepareOpTime = txnParticipant.getPrepareOpTime();
startCapturingLogMessages();
@@ -3441,7 +3487,19 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterExceptionInPrepare) {
setupAdditiveMetrics(metricValue, opCtx());
txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction");
+
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 10;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
tickSource->advance(Microseconds(11 * 1000));
_opObserver->onTransactionPrepareThrowsException = true;
@@ -3496,7 +3554,18 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowStashedAbort) {
ASSERT(txnResourceStashLocker);
const auto lockerInfo = txnResourceStashLocker->getLockerInfo(boost::none);
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 10;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
tickSource->advance(Microseconds(11 * 1000));
startCapturingLogMessages();
@@ -3507,13 +3576,54 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoAfterSlowStashedAbort) {
ASSERT_EQUALS(1, countTextFormatLogLinesContaining(expectedTransactionInfo));
}
+TEST_F(TransactionsMetricsTest, LogTransactionInfoZeroSampleRate) {
+ auto tickSource = initMockTickSource();
+
+ auto sessionCheckout = checkOutSession();
+
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
+ serverGlobalParams.slowMS = 10;
+ // Set the sample rate to 0 to never log this transaction.
+ serverGlobalParams.sampleRate = 0;
+
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction");
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
+ tickSource->advance(Microseconds(11 * 1000));
+
+ startCapturingLogMessages();
+ txnParticipant.commitUnpreparedTransaction(opCtx());
+ stopCapturingLogMessages();
+
+ // Test that the transaction is not logged.
+ ASSERT_EQUALS(0, countTextFormatLogLinesContaining("transaction parameters"));
+}
+
TEST_F(TransactionsMetricsTest, LogTransactionInfoVerbosityInfo) {
auto sessionCheckout = checkOutSession();
auto txnParticipant = TransactionParticipant::get(opCtx());
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
// Set a high slow operation threshold to avoid the transaction being logged as slow.
serverGlobalParams.slowMS = 10000;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
// Set verbosity level of transaction components to info.
setMinimumLoggedSeverity(logger::LogComponent::kTransaction, logger::LogSeverity::Info());
@@ -3533,14 +3643,24 @@ TEST_F(TransactionsMetricsTest, LogTransactionInfoVerbosityDebug) {
auto txnParticipant = TransactionParticipant::get(opCtx());
- // Set a high slow operation threshold to avoid the transaction being logged as slow.
- serverGlobalParams.slowMS = 10000;
-
// Set verbosity level of transaction components to debug.
setMinimumLoggedSeverity(logger::LogComponent::kTransaction, logger::LogSeverity::Debug(1));
txnParticipant.unstashTransactionResources(opCtx(), "commitTransaction");
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
+ // Set a high slow operation threshold to avoid the transaction being logged as slow.
+ serverGlobalParams.slowMS = 10000;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
startCapturingLogMessages();
txnParticipant.commitUnpreparedTransaction(opCtx());
stopCapturingLogMessages();
diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp
index 6fa39d62e90..44796119e1a 100644
--- a/src/mongo/s/transaction_router.cpp
+++ b/src/mongo/s/transaction_router.cpp
@@ -53,6 +53,7 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/log.h"
+#include "mongo/util/log_with_sampling.h"
#include "mongo/util/net/socket_utils.h"
namespace mongo {
@@ -1427,8 +1428,12 @@ void TransactionRouter::Router::_endTransactionTrackingIfNecessary(
}
const auto& timingStats = o().metricsTracker->getTimingStats();
- if (shouldLog(logger::LogComponent::kTransaction, logger::LogSeverity::Debug(1)) ||
- timingStats.getDuration(tickSource, curTicks) > Milliseconds(serverGlobalParams.slowMS)) {
+ const auto opDuration =
+ duration_cast<Milliseconds>(timingStats.getDuration(tickSource, curTicks));
+
+ if (shouldLogSlowOpWithSampling(
+ opCtx, MONGO_LOG_DEFAULT_COMPONENT, opDuration, Milliseconds(serverGlobalParams.slowMS))
+ .first) {
_logSlowTransaction(opCtx, terminationCause);
}
}
diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp
index dd2e86df0bc..30564c94f6d 100644
--- a/src/mongo/s/transaction_router_test.cpp
+++ b/src/mongo/s/transaction_router_test.cpp
@@ -3098,7 +3098,17 @@ protected:
//
TEST_F(TransactionRouterMetricsTest, DoesNotLogTransactionsUnderSlowMSThreshold) {
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 100;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
beginTxnWithDefaultTxnNumber();
tickSource()->advance(Milliseconds(99));
@@ -3107,7 +3117,17 @@ TEST_F(TransactionRouterMetricsTest, DoesNotLogTransactionsUnderSlowMSThreshold)
}
TEST_F(TransactionRouterMetricsTest, LogsTransactionsOverSlowMSThreshold) {
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
serverGlobalParams.slowMS = 100;
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
beginTxnWithDefaultTxnNumber();
tickSource()->advance(Milliseconds(101));
@@ -3115,6 +3135,25 @@ TEST_F(TransactionRouterMetricsTest, LogsTransactionsOverSlowMSThreshold) {
assertPrintedExactlyOneSlowLogLine();
}
+TEST_F(TransactionRouterMetricsTest, DoesNotLogTransactionsWithSampleRateZero) {
+ const auto originalSlowMS = serverGlobalParams.slowMS;
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+
+ serverGlobalParams.slowMS = 100;
+ serverGlobalParams.sampleRate = 0;
+
+ // Reset the global parameters to their original values after this test exits.
+ ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+ serverGlobalParams.slowMS = originalSlowMS;
+ serverGlobalParams.sampleRate = originalSampleRate;
+ });
+
+ beginTxnWithDefaultTxnNumber();
+ tickSource()->advance(Milliseconds(101));
+ runCommit(kDummyOkRes);
+ assertDidNotPrintSlowLogLine();
+}
+
TEST_F(TransactionRouterMetricsTest, OnlyLogSlowTransactionsOnce) {
beginSlowTxnWithDefaultTxnNumber();
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index 1e810450ae8..0cf1b69be67 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -575,6 +575,7 @@ icuEnv.CppUnitTest(
'itoa_test.cpp',
'latch_analyzer_test.cpp',
'lockable_adapter_test.cpp',
+ 'log_with_sampling_test.cpp',
'lru_cache_test.cpp',
'md5_test.cpp',
'md5main.cpp',
diff --git a/src/mongo/util/log_with_sampling.h b/src/mongo/util/log_with_sampling.h
new file mode 100644
index 00000000000..e0cffe2fe29
--- /dev/null
+++ b/src/mongo/util/log_with_sampling.h
@@ -0,0 +1,63 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <utility>
+
+#include "mongo/logger/log_component.h"
+#include "mongo/logger/logger.h"
+#include "mongo/logv2/log_component.h"
+
+namespace mongo {
+
+/*
+ * Return a pair of booleans. The first value is if we should log the operation and the second value
+ * is if we should sample this operation for profiling.
+ */
+inline std::pair<bool, bool> shouldLogSlowOpWithSampling(OperationContext* opCtx,
+ logger::LogComponent logComponent,
+ Milliseconds opDuration,
+ Milliseconds slowMS) {
+ // Log the operation if log message verbosity for operation component is >= 1.
+ const bool componentHasTargetLogVerbosity =
+ shouldLog(logComponent, logger::LogSeverity::Debug(1));
+
+ const auto client = opCtx->getClient();
+ const bool shouldSample =
+ client->getPrng().nextCanonicalDouble() < serverGlobalParams.sampleRate;
+
+ // Log the transaction if we should sample and its duration is longer than
+ // the slowMS command threshold.
+ const bool shouldLogSlowOp = shouldSample && opDuration > slowMS;
+
+ return std::pair<bool, bool>(componentHasTargetLogVerbosity || shouldLogSlowOp, shouldSample);
+}
+
+} // namespace mongo
diff --git a/src/mongo/util/log_with_sampling_test.cpp b/src/mongo/util/log_with_sampling_test.cpp
new file mode 100644
index 00000000000..f0dc97be046
--- /dev/null
+++ b/src/mongo/util/log_with_sampling_test.cpp
@@ -0,0 +1,171 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/util/log_global_settings.h"
+#include "mongo/util/log_with_sampling.h"
+
+namespace mongo {
+namespace {
+
+
+TEST(LogWithSamplingTest, ShouldLogCorrectlyWhenSampleRateIsSet) {
+ const auto serviceContext = ServiceContext::make();
+ const auto client = serviceContext->makeClient("log_with_sampling_test");
+ const auto opCtx = client->makeOperationContext();
+
+ const auto slowOpThresholdMS = Milliseconds(10);
+
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+ // Set sample rate to always profile a slow operation.
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset sample rate to the original value after the test exits.
+ ON_BLOCK_EXIT([originalSampleRate] { serverGlobalParams.sampleRate = originalSampleRate; });
+
+ // Set the op duration to be greater than slowOpThreshold so that the op is considered slow.
+ const auto slowOpDurationMS = Milliseconds(11);
+ // Set verbosity level of operation component to info so that it doesn't log due to the log
+ // level.
+ setMinimumLoggedSeverity(MONGO_LOG_DEFAULT_COMPONENT, logger::LogSeverity::Info());
+
+ bool shouldLogSlowOp, shouldSample;
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx.get(), MONGO_LOG_DEFAULT_COMPONENT, slowOpDurationMS, slowOpThresholdMS);
+
+ // Verify that shouldLogSlowOp is true when the sampleRate is 1.
+ ASSERT_TRUE(shouldLogSlowOp);
+
+ // Verify that shouldSample is true when the sampleRate is 1.
+ ASSERT_TRUE(shouldSample);
+
+ // Set sample rate to never profile a slow operation.
+ serverGlobalParams.sampleRate = 0;
+
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx.get(), MONGO_LOG_DEFAULT_COMPONENT, slowOpDurationMS, slowOpThresholdMS);
+
+ // Verify that shouldLogSlowOp is false when the sampleRate is 0.
+ ASSERT_FALSE(shouldLogSlowOp);
+
+ // Verify that shouldSample is false when the sampleRate is 0.
+ ASSERT_FALSE(shouldSample);
+}
+
+TEST(LogWithSamplingTest, ShouldAlwaysLogsWithVerbosityLevelDebug) {
+ const auto serviceContext = ServiceContext::make();
+ const auto client = serviceContext->makeClient("log_with_sampling_test");
+ const auto opCtx = client->makeOperationContext();
+
+ const auto slowOpThresholdMS = Milliseconds(10);
+
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+ // Set sample rate to always profile a slow operation.
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset sample rate to the original value after the test exits.
+ ON_BLOCK_EXIT([originalSampleRate] { serverGlobalParams.sampleRate = originalSampleRate; });
+
+ // Set the op duration to be greater than slowMS so that the op is considered slow.
+ const auto slowOpDurationMS = Milliseconds(11);
+ // Set verbosity level of operation component to debug so that it should always log.
+ setMinimumLoggedSeverity(MONGO_LOG_DEFAULT_COMPONENT, logger::LogSeverity::Debug(1));
+
+ bool shouldLogSlowOp, shouldSample;
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx.get(), MONGO_LOG_DEFAULT_COMPONENT, slowOpDurationMS, slowOpThresholdMS);
+
+ // Verify that shouldLogSlowOp is true when the op is slow.
+ ASSERT_TRUE(shouldLogSlowOp);
+
+ // Verify that shouldSample should be true when the sampleRate is 1.
+ ASSERT_TRUE(shouldSample);
+
+ // Set sample rate to never profile a slow operation.
+ serverGlobalParams.sampleRate = 0;
+
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx.get(), MONGO_LOG_DEFAULT_COMPONENT, slowOpDurationMS, slowOpThresholdMS);
+
+ // Verify that we should still log even when the sampleRate is 0.
+ ASSERT_TRUE(shouldLogSlowOp);
+
+ // Verify that shouldSample should be false even if shouldLogSlowOp is true.
+ ASSERT_FALSE(shouldSample);
+}
+
+TEST(LogWithSamplingTest, ShouldNotLogFastOp) {
+ const auto serviceContext = ServiceContext::make();
+ const auto client = serviceContext->makeClient("log_with_sampling_test");
+ const auto opCtx = client->makeOperationContext();
+
+ const auto slowOpThresholdMS = Milliseconds(10);
+
+ const auto originalSampleRate = serverGlobalParams.sampleRate;
+ // Set sample rate to always profile a slow operation.
+ serverGlobalParams.sampleRate = 1;
+
+ // Reset sample rate to the original value after the test exits.
+ ON_BLOCK_EXIT([originalSampleRate] { serverGlobalParams.sampleRate = originalSampleRate; });
+
+ // Set the op duration to be less than than slowOpThreshold so that the op is considered fast.
+ const auto fastOpDurationMS = Milliseconds(9);
+ // Set verbosity level of operation component to info so that it doesn't log due to the log
+ // level.
+ setMinimumLoggedSeverity(MONGO_LOG_DEFAULT_COMPONENT, logger::LogSeverity::Info());
+
+ bool shouldLogSlowOp, shouldSample;
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx.get(), MONGO_LOG_DEFAULT_COMPONENT, fastOpDurationMS, slowOpThresholdMS);
+
+ // Verify that shouldLogSlowOp is false when the op is fast.
+ ASSERT_FALSE(shouldLogSlowOp);
+
+ // Verify that shouldSample is true when the sampleRate is 1.
+ ASSERT_TRUE(shouldSample);
+
+ // Set sample rate to never profile a slow operation.
+ serverGlobalParams.sampleRate = 0;
+
+ std::tie(shouldLogSlowOp, shouldSample) = shouldLogSlowOpWithSampling(
+ opCtx.get(), MONGO_LOG_DEFAULT_COMPONENT, fastOpDurationMS, slowOpThresholdMS);
+
+ // Verify that we should still not log when the sampleRate is 0.
+ ASSERT_FALSE(shouldLogSlowOp);
+
+ // Verify that shouldSample is false when the sampleRate is 0.
+ ASSERT_FALSE(shouldSample);
+}
+
+} // namespace
+} // namespace mongo