From e75f7fa2231cbd78d82923393de84d72702f9bcf Mon Sep 17 00:00:00 2001 From: Nathan Louie Date: Wed, 1 Aug 2018 15:47:57 -0400 Subject: SERVER-35159 Track transactions latency and add to serverStatus output --- .../global_transaction_latency_histogram.js | 120 +++++++++++++++++++++ src/mongo/db/SConscript | 1 + src/mongo/db/stats/top.cpp | 5 + src/mongo/db/stats/top.h | 7 +- src/mongo/db/transaction_participant.cpp | 19 ++-- 5 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 jstests/noPassthrough/global_transaction_latency_histogram.js diff --git a/jstests/noPassthrough/global_transaction_latency_histogram.js b/jstests/noPassthrough/global_transaction_latency_histogram.js new file mode 100644 index 00000000000..561eafeb6d4 --- /dev/null +++ b/jstests/noPassthrough/global_transaction_latency_histogram.js @@ -0,0 +1,120 @@ +// Checks that the global histogram counter for transactions are updated as we expect. +// @tags: [requires_replication, uses_transactions] +(function() { + "use strict"; + + // Set up the replica set. + const rst = new ReplSetTest({nodes: 1}); + rst.startSet(); + rst.initiate(); + const primary = rst.getPrimary(); + + // Set up the test database. + const dbName = "test"; + const collName = "global_transaction_latency_histogram"; + + const testDB = primary.getDB(dbName); + testDB.runCommand({drop: collName, writeConcern: {w: "majority"}}); + assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}})); + + // Start the session. + const sessionOptions = {causalConsistency: false}; + const session = testDB.getMongo().startSession(sessionOptions); + const sessionDb = session.getDatabase(dbName); + const sessionColl = sessionDb[collName]; + + function getHistogramStats() { + return testDB.serverStatus({opLatencies: {histograms: 1}}).opLatencies; + } + + // Checks that the actual value is within a minimum on the bound of the expected value. All + // arguments must be in the same units. + function assertLowerBound(expected, actual, bound) { + assert.gte(actual, expected - bound); + } + + // This function checks the diff between the last histogram and the current histogram, not the + // absolute values. + function checkHistogramDiff(lastHistogram, thisHistogram, fields) { + for (let key in fields) { + if (fields.hasOwnProperty(key)) { + assert.eq(thisHistogram[key].ops - lastHistogram[key].ops, fields[key]); + } + } + return thisHistogram; + } + + // This function checks the diff between the last histogram's accumulated transactions latency + // and this histogram's accumulated transactions latency is within a reasonable bound of what + // we expect. + function checkHistogramLatencyDiff(lastHistogram, thisHistogram, sleepTime) { + let latencyDiff = thisHistogram.transactions.latency - lastHistogram.transactions.latency; + // Check the bound in microseconds, which is the unit the latency is in. We do not check + // upper bound because of unknown extra server latency. + assertLowerBound(sleepTime * 1000, latencyDiff, 50000); + return thisHistogram; + } + + let lastHistogram = getHistogramStats(); + + // Verify the base stats are correct. + lastHistogram = checkHistogramDiff(lastHistogram, + getHistogramStats(), + {"reads": 0, "writes": 0, "commands": 1, "transactions": 0}); + + // Test histogram increments on a successful transaction. "commitTransaction" and "serverStatus" + // commands are counted towards the "commands" counter. + session.startTransaction(); + assert.commandWorked(sessionColl.insert({_id: "insert-1"})); + session.commitTransaction(); + lastHistogram = checkHistogramDiff(lastHistogram, + getHistogramStats(), + {"reads": 0, "writes": 1, "commands": 2, "transactions": 1}); + + // Test histogram increments on aborted transaction due to error (duplicate insert). + session.startTransaction(); + assert.commandFailedWithCode(sessionColl.insert({_id: "insert-1"}), ErrorCodes.DuplicateKey); + lastHistogram = checkHistogramDiff(lastHistogram, + getHistogramStats(), + {"reads": 0, "writes": 1, "commands": 1, "transactions": 1}); + + // Ensure that the transaction was aborted on failure. + assert.commandFailedWithCode(session.commitTransaction_forTesting(), + ErrorCodes.NoSuchTransaction); + lastHistogram = checkHistogramDiff(lastHistogram, + getHistogramStats(), + {"reads": 0, "writes": 0, "commands": 2, "transactions": 0}); + + // Test histogram increments on an aborted transaction. "abortTransaction" command is counted + // towards the "commands" counter. + session.startTransaction(); + assert.commandWorked(sessionColl.insert({_id: "insert-2"})); + session.abortTransaction(); + lastHistogram = checkHistogramDiff(lastHistogram, + getHistogramStats(), + {"reads": 0, "writes": 1, "commands": 2, "transactions": 1}); + + // Test histogram increments on a multi-statement committed transaction. + session.startTransaction(); + assert.commandWorked(sessionColl.insert({_id: "insert-3"})); + assert.commandWorked(sessionColl.insert({_id: "insert-4"})); + assert.eq(sessionColl.find({_id: "insert-1"}).itcount(), 1); + session.commitTransaction(); + lastHistogram = checkHistogramDiff(lastHistogram, + getHistogramStats(), + {"reads": 1, "writes": 2, "commands": 2, "transactions": 1}); + + // Test that the cumulative transaction latency counter is updated appropriately after a + // sequence of back-to-back 200 ms transactions. + const sleepTime = 200; + for (let i = 0; i < 3; i++) { + session.startTransaction(); + assert.eq(sessionColl.find({_id: "insert-1"}).itcount(), 1); + sleep(sleepTime); + session.commitTransaction(); + lastHistogram = checkHistogramLatencyDiff(lastHistogram, getHistogramStats(), sleepTime); + } + + session.endSession(); + rst.stopSet(); +}()); \ No newline at end of file diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 75360bf6fff..1fbc546cac3 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -648,6 +648,7 @@ env.Library( 'repl/oplog_entry', 'repl/oplog_shim', 's/sharding_api_d', + "stats/top", 'views/views', '$BUILD_DIR/mongo/db/commands/test_commands_enabled', ], diff --git a/src/mongo/db/stats/top.cpp b/src/mongo/db/stats/top.cpp index 155f122fef5..5f6b5c2e7a0 100644 --- a/src/mongo/db/stats/top.cpp +++ b/src/mongo/db/stats/top.cpp @@ -220,6 +220,11 @@ void Top::appendGlobalLatencyStats(bool includeHistograms, BSONObjBuilder* build _globalHistogramStats.append(includeHistograms, builder); } +void Top::incrementGlobalTransactionLatencyStats(uint64_t latency) { + stdx::lock_guard guard(_lock); + _globalHistogramStats.increment(latency, Command::ReadWriteType::kTransaction); +} + void Top::_incrementHistogram(OperationContext* opCtx, long long latency, OperationLatencyHistogram* histogram, diff --git a/src/mongo/db/stats/top.h b/src/mongo/db/stats/top.h index 6dc2cf7f8a3..edcc72778b7 100644 --- a/src/mongo/db/stats/top.h +++ b/src/mongo/db/stats/top.h @@ -112,12 +112,17 @@ public: void appendLatencyStats(StringData ns, bool includeHistograms, BSONObjBuilder* builder); /** - * Increments the global histogram. + * Increments the global histogram only if the operation came from a user. */ void incrementGlobalLatencyStats(OperationContext* opCtx, uint64_t latency, Command::ReadWriteType readWriteType); + /** + * Increments the global transactions histogram. + */ + void incrementGlobalTransactionLatencyStats(uint64_t latency); + /** * Appends the global latency statistics. */ diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 35238c34909..e0f90cd1d14 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -43,6 +43,7 @@ #include "mongo/db/session.h" #include "mongo/db/session_catalog.h" #include "mongo/db/stats/fill_locker_info.h" +#include "mongo/db/stats/top.h" #include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" #include "mongo/util/net/socket_utils.h" @@ -693,17 +694,19 @@ void TransactionParticipant::_commitTransaction(stdx::unique_lock l _txnState.transitionTo(lk, TransactionState::kCommitted); - ServerTransactionsMetrics::get(opCtx)->incrementTotalCommitted(); - // After the transaction has been committed, we must update the end time and mark it as // inactive. - _singleTransactionStats.setEndTime(curTimeMicros64()); + const auto now = curTimeMicros64(); + _singleTransactionStats.setEndTime(now); if (_singleTransactionStats.isActive()) { - _singleTransactionStats.setInactive(curTimeMicros64()); + _singleTransactionStats.setInactive(now); } + ServerTransactionsMetrics::get(opCtx)->incrementTotalCommitted(); ServerTransactionsMetrics::get(opCtx)->decrementCurrentOpen(); ServerTransactionsMetrics::get(getGlobalServiceContext())->decrementCurrentActive(); + Top::get(getGlobalServiceContext()) + .incrementGlobalTransactionLatencyStats(_singleTransactionStats.getDuration(now)); // Add the latest operation stats to the aggregate OpDebug object stored in the // SingleTransactionStats instance on the Session. @@ -790,11 +793,12 @@ void TransactionParticipant::_abortActiveTransaction(OperationContext* opCtx, } void TransactionParticipant::_abortTransactionOnSession(WithLock wl) { + const auto now = curTimeMicros64(); if (!_txnState.isNone(wl)) { - _singleTransactionStats.setEndTime(curTimeMicros64()); + _singleTransactionStats.setEndTime(now); // The transaction has aborted, so we mark it as inactive. if (_singleTransactionStats.isActive()) { - _singleTransactionStats.setInactive(curTimeMicros64()); + _singleTransactionStats.setInactive(now); } } @@ -822,6 +826,9 @@ void TransactionParticipant::_abortTransactionOnSession(WithLock wl) { ServerTransactionsMetrics::get(getGlobalServiceContext())->incrementTotalAborted(); ServerTransactionsMetrics::get(getGlobalServiceContext())->decrementCurrentOpen(); + + Top::get(getGlobalServiceContext()) + .incrementGlobalTransactionLatencyStats(_singleTransactionStats.getDuration(now)); } void TransactionParticipant::_cleanUpTxnResourceOnOpCtx(WithLock wl, OperationContext* opCtx) { -- cgit v1.2.1