diff options
author | Dianna Hohensee <dianna.hohensee@10gen.com> | 2018-03-26 17:26:50 -0400 |
---|---|---|
committer | Dianna Hohensee <dianna.hohensee@10gen.com> | 2018-04-13 10:58:19 -0400 |
commit | 4f0c2f4047bdafe7a5d952a9671bf436a763c4d5 (patch) | |
tree | e202c0b4f8598f669df77efa1a44ee94a2ec0dc3 | |
parent | 10bcc73a75ac857c290c3af6a3f89a45c4867043 (diff) | |
download | mongo-4f0c2f4047bdafe7a5d952a9671bf436a763c4d5.tar.gz |
SERVER-33295 Periodic task to check for and kill expired transactions.
(Adding a new int server parameter 'transactionLifetimeLimitSeconds'.)
-rw-r--r-- | buildscripts/resmokelib/core/programs.py | 8 | ||||
-rw-r--r-- | jstests/core/txns/abort_expired_transaction.js | 88 | ||||
-rw-r--r-- | jstests/noPassthrough/transactionLifetimeLimitSeconds_setParameter.js | 57 | ||||
-rw-r--r-- | src/mongo/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/SConscript | 11 | ||||
-rw-r--r-- | src/mongo/db/db.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/kill_sessions_local.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/kill_sessions_local.h | 8 | ||||
-rw-r--r-- | src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp | 97 | ||||
-rw-r--r-- | src/mongo/db/periodic_runner_job_abort_expired_transactions.h | 45 | ||||
-rw-r--r-- | src/mongo/db/session.cpp | 54 | ||||
-rw-r--r-- | src/mongo/db/session.h | 18 |
12 files changed, 402 insertions, 4 deletions
diff --git a/buildscripts/resmokelib/core/programs.py b/buildscripts/resmokelib/core/programs.py index db7a98a52ce..1b5ccd884aa 100644 --- a/buildscripts/resmokelib/core/programs.py +++ b/buildscripts/resmokelib/core/programs.py @@ -48,6 +48,14 @@ def mongod_program( # pylint: disable=too-many-branches if "disableLogicalSessionCacheRefresh" not in suite_set_parameters: suite_set_parameters["disableLogicalSessionCacheRefresh"] = True + # There's a periodic background thread that checks for and aborts expired transactions. + # "transactionLifetimeLimitSeconds" specifies for how long a transaction can run before expiring + # and being aborted by the background thread. It defaults to 60 seconds, which is too short to + # be reliable for our tests. Setting it to 3 hours, so that it is longer than the 2 hours we + # allow JS tests to run before timing them out. + if "transactionLifetimeLimitSeconds" not in suite_set_parameters: + suite_set_parameters["transactionLifetimeLimitSeconds"] = 3 * 60 * 60; + # The periodic no-op writer writes an oplog entry of type='n' once every 10 seconds. This has # the potential to mask issues such as SERVER-31609 because it allows the operationTime of # cluster to advance even if the client is blocked for other reasons. We should disable the diff --git a/jstests/core/txns/abort_expired_transaction.js b/jstests/core/txns/abort_expired_transaction.js new file mode 100644 index 00000000000..bcfe280834f --- /dev/null +++ b/jstests/core/txns/abort_expired_transaction.js @@ -0,0 +1,88 @@ +// Tests that a transaction expires and is then aborted by the server. Uses the server parameter +// 'transactionLifetimeLimitSeconds' to lower the transaction lifetime for quicker transaction +// expiration. +// +// @tags: [uses_transactions] + +(function() { + "use strict"; + + const testDBName = "testDB"; + const testCollName = "abort_expired_transaction"; + const ns = testDBName + "." + testCollName; + const testDB = db.getSiblingDB(testDBName); + const testColl = testDB[testCollName]; + testColl.drop(); + + // Need the original 'transactionLifetimeLimitSeconds' value so that we can reset it back at the + // end of the test. + const res = assert.commandWorked( + db.adminCommand({getParameter: 1, transactionLifetimeLimitSeconds: 1})); + const originalTransactionLifetimeLimitSeconds = res.transactionLifetimeLimitSeconds; + + try { + jsTest.log("Decrease transactionLifetimeLimitSeconds from " + + originalTransactionLifetimeLimitSeconds + " to 1 second."); + assert.commandWorked( + db.adminCommand({setParameter: 1, transactionLifetimeLimitSeconds: 1})); + + jsTest.log("Create a collection '" + ns + "' outside of the transaction."); + assert.writeOK(testColl.insert({foo: "bar"})); + + jsTest.log("Set up the session."); + const sessionOptions = {causalConsistency: false}; + const session = db.getMongo().startSession(sessionOptions); + const sessionDb = session.getDatabase(testDBName); + + let txnNumber = 0; + + jsTest.log("Insert a document starting a transaction."); + assert.commandWorked(sessionDb.runCommand({ + insert: testCollName, + documents: [{_id: "insert-1"}], + txnNumber: NumberLong(txnNumber), + startTransaction: true, + autocommit: false, + })); + + // We can deterministically wait for the transaction to be aborted by waiting for currentOp + // to cease reporting the inactive transaction: the transaction should disappear from the + // currentOp results once aborted. + jsTest.log("Wait for the transaction to expire and be aborted."); + assert.soon( + function() { + const sessionFilter = { + active: false, + opid: {$exists: false}, + desc: "inactive transaction", "lsid.id": session.getSessionId().id, + txnNumber: NumberLong(txnNumber), + }; + const res = db.getSiblingDB("admin").aggregate( + [{$currentOp: {allUsers: true, idleSessions: true}}, {$match: sessionFilter}]); + return (res.itcount() == 0); + + }, + "currentOp reports that the idle transaction still exists, it has not been " + + "aborted as expected."); + + jsTest.log( + "Attempt to do a write in the transaction, which should fail because the transaction " + + "was aborted"); + assert.commandFailedWithCode(sessionDb.runCommand({ + insert: testCollName, + documents: [{_id: "insert-2"}], + txnNumber: NumberLong(txnNumber), + autocommit: false, + }), + ErrorCodes.NoSuchTransaction); + + session.endSession(); + } finally { + // Must ensure that the transactionLifetimeLimitSeconds is reset so that it does not impact + // other tests in the suite. + assert.commandWorked(db.adminCommand({ + setParameter: 1, + transactionLifetimeLimitSeconds: originalTransactionLifetimeLimitSeconds + })); + } +}()); diff --git a/jstests/noPassthrough/transactionLifetimeLimitSeconds_setParameter.js b/jstests/noPassthrough/transactionLifetimeLimitSeconds_setParameter.js new file mode 100644 index 00000000000..a53fda9ea90 --- /dev/null +++ b/jstests/noPassthrough/transactionLifetimeLimitSeconds_setParameter.js @@ -0,0 +1,57 @@ +// Test server validation of the 'transactionLifetimeLimitSeconds' server parameter setting on +// startup and via setParameter command. + +(function() { + 'use strict'; + + /** + * Takes a server connection 'conn' and server parameter 'field' and calls getParameter on the + * connection to retrieve the current setting of that server parameter. + */ + function getParameter(conn, field) { + var q = {getParameter: 1}; + q[field] = 1; + + var ret = conn.getDB("admin").runCommand(q); + return ret[field]; + } + + /** + * Calls setParameter on 'conn' server connection, setting server parameter 'field' to 'value'. + */ + function setParameter(conn, field, value) { + var cmd = {setParameter: 1}; + cmd[field] = value; + return conn.adminCommand(cmd); + } + + // Check that 'transaictionLifetimeLimitSeconds' defaults to 60s on startup. + let conn1 = MongoRunner.runMongod({}); + assert.eq(getParameter(conn1, "transactionLifetimeLimitSeconds"), 60); + + // Check that 'transactionLifetimeLimitSeconds' can be set via setParameter. + assert.commandWorked(setParameter(conn1, "transactionLifetimeLimitSeconds", 30)); + assert.eq(getParameter(conn1, "transactionLifetimeLimitSeconds"), 30); + + // Check that setParameter on 'transactionLifetimeLimitSeconds' does validation checking: + // setting 'transactionLifetimeLimitSeconds' below 1s should not be allowed. + assert.commandFailedWithCode(setParameter(conn1, "transactionLifetimeLimitSeconds", -15), + ErrorCodes.BadValue); + assert.eq(getParameter(conn1, "transactionLifetimeLimitSeconds"), 30); + + MongoRunner.stopMongod(conn1); + + // Check that 'transactionLifetimeLimitSeconds' can be set on startup. + let conn2 = MongoRunner.runMongod({setParameter: "transactionLifetimeLimitSeconds=50"}); + assert.eq(getParameter(conn2, "transactionLifetimeLimitSeconds"), 50); + MongoRunner.stopMongod(conn2); + + // Check that 'transactionLifetimeLimitSeconds' cannot be set below 1s on startup. + let conn3 = MongoRunner.runMongod({setParameter: "transactionLifetimeLimitSeconds=0"}); + assert.eq( + null, + conn3, + "expected mongod to fail to startup with an invalid 'transactionLifetimeLimitSeconds'" + + " server parameter setting of 0s."); + +})(); diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 45135329615..3ff223778d2 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -304,6 +304,7 @@ env.Library( 'db/mongod_options', 'db/mongodandmongos', 'db/op_observer_d', + 'db/periodic_runner_job_abort_expired_transactions', 'db/query_exec', 'db/repair_database', 'db/repair_database_and_check_version', diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index a2c4a480407..12b1cc9830d 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1447,6 +1447,17 @@ env.Library( ) env.Library( + target='periodic_runner_job_abort_expired_transactions', + source=[ + 'periodic_runner_job_abort_expired_transactions.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/util/periodic_runner', + 'kill_sessions_local', + ], +) + +env.Library( target='signed_logical_time', source=[ 'signed_logical_time.cpp', diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 52e368abb7a..c23c493622e 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -94,6 +94,7 @@ #include "mongo/db/op_observer_impl.h" #include "mongo/db/op_observer_registry.h" #include "mongo/db/operation_context.h" +#include "mongo/db/periodic_runner_job_abort_expired_transactions.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/repair_database_and_check_version.h" #include "mongo/db/repl/drop_pending_collection_reaper.h" @@ -580,6 +581,15 @@ ExitCode _initAndListen(int listenPort) { SessionKiller::set(serviceContext, std::make_shared<SessionKiller>(serviceContext, killSessionsLocal)); + // Start up a background task to periodically check for and kill expired transactions. + // Only do this on storage engines supporting snapshot reads, which hold resources we wish to + // release periodically in order to avoid storage cache pressure build up. + auto storageEngine = serviceContext->getGlobalStorageEngine(); + invariant(storageEngine); + if (storageEngine->supportsReadConcernSnapshot()) { + startPeriodicThreadToAbortExpiredTransactions(serviceContext); + } + // Set up the logical session cache LogicalSessionCacheServer kind = LogicalSessionCacheServer::kStandalone; if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) { diff --git a/src/mongo/db/kill_sessions_local.cpp b/src/mongo/db/kill_sessions_local.cpp index 9acfd6c6aed..1eec2904850 100644 --- a/src/mongo/db/kill_sessions_local.cpp +++ b/src/mongo/db/kill_sessions_local.cpp @@ -67,4 +67,13 @@ SessionKiller::Result killSessionsLocal(OperationContext* opCtx, return {std::vector<HostAndPort>{}}; } +void killAllExpiredTransactions(OperationContext* opCtx) { + SessionKiller::Matcher matcherAllSessions( + KillAllSessionsByPatternSet{makeKillAllSessionsByPattern(opCtx)}); + SessionCatalog::get(opCtx)->scanSessions( + opCtx, matcherAllSessions, [](OperationContext* opCtx, Session* session) { + session->abortArbitraryTransactionIfExpired(); + }); +} + } // namespace mongo diff --git a/src/mongo/db/kill_sessions_local.h b/src/mongo/db/kill_sessions_local.h index c9d813f4915..a21bcec4ae5 100644 --- a/src/mongo/db/kill_sessions_local.h +++ b/src/mongo/db/kill_sessions_local.h @@ -30,6 +30,9 @@ #include "mongo/db/session_killer.h" +/** + * Mongod local kill session / transaction functionality library. + */ namespace mongo { /** @@ -45,4 +48,9 @@ SessionKiller::Result killSessionsLocal(OperationContext* opCtx, void killSessionsLocalKillTransactions(OperationContext* opCtx, const SessionKiller::Matcher& matcher); +/** + * Aborts any expired transactions. + */ +void killAllExpiredTransactions(OperationContext* opCtx); + } // namespace mongo diff --git a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp new file mode 100644 index 00000000000..d6b394f6db1 --- /dev/null +++ b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp @@ -0,0 +1,97 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/periodic_runner_job_abort_expired_transactions.h" + +#include "mongo/db/client.h" +#include "mongo/db/kill_sessions_local.h" +#include "mongo/db/service_context.h" +#include "mongo/db/session.h" +#include "mongo/util/log.h" +#include "mongo/util/periodic_runner.h" + +namespace mongo { + +void startPeriodicThreadToAbortExpiredTransactions(ServiceContext* serviceContext) { + // Enforce calling this function once, and only once. + static bool firstCall = true; + invariant(firstCall); + firstCall = false; + + auto periodicRunner = serviceContext->getPeriodicRunner(); + invariant(periodicRunner); + + // We want this job period to be dynamic, to run every (transactionLifetimeLimitSeconds/2) + // seconds, where transactionLifetimeLimitSeconds is an adjustable server parameter, or within + // the 1 second to 1 minute range. + // + // PeriodicRunner does not currently support altering the period of a job. So we are giving this + // job a 1 second period on PeriodicRunner and incrementing a static variable 'seconds' on each + // run until we reach transactionLifetimeLimitSeconds/2, at which point we run the code and + // reset 'seconds'. Etc. + PeriodicRunner::PeriodicJob job( + [](Client* client) { + try { + static int seconds = 0; + int lifetime = transactionLifetimeLimitSeconds.load(); + + invariant(lifetime >= 1); + int period = lifetime / 2; + + // Ensure: 1 <= period <= 60 seconds + period = (period < 1) ? 1 : period; + period = (period > 60) ? 60 : period; + + if (++seconds < period) { + return; + } + + seconds = 0; + + // The opCtx destructor handles unsetting itself from the Client. + // (The PeriodicRunnerASIO's Client must be reset before returning.) + auto opCtx = client->makeOperationContext(); + + killAllExpiredTransactions(opCtx.get()); + } catch (const DBException& ex) { + if (!ErrorCodes::isShutdownError(ex.toStatus().code())) { + warning() << "Periodic task to abort expired transactions failed! Caused by: " + << ex.toStatus(); + } + } + }, + Seconds(1)); + + periodicRunner->scheduleJob(std::move(job)); +} + +} // namespace mongo diff --git a/src/mongo/db/periodic_runner_job_abort_expired_transactions.h b/src/mongo/db/periodic_runner_job_abort_expired_transactions.h new file mode 100644 index 00000000000..28ab86d1b64 --- /dev/null +++ b/src/mongo/db/periodic_runner_job_abort_expired_transactions.h @@ -0,0 +1,45 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +namespace mongo { + +class ServiceContext; + +/** + * Defines and starts a periodic background job to check for and abort expired transactions. + * The job will run every (transactionLifetimeLimitSeconds/2) seconds, or at most once per second + * and at least once per minute. + * + * This function should only ever be called once, during mongod server startup (db.cpp). + * The PeriodicRunner will handle shutting down the job on shutdown, no extra handling necessary. + */ +void startPeriodicThreadToAbortExpiredTransactions(ServiceContext* serviceContext); + +} // namespace mongo diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp index bbf7c404093..6972f5ceb3f 100644 --- a/src/mongo/db/session.cpp +++ b/src/mongo/db/session.cpp @@ -46,6 +46,7 @@ #include "mongo/db/repl/read_concern_args.h" #include "mongo/db/retryable_writes_stats.h" #include "mongo/db/server_options.h" +#include "mongo/db/server_parameters.h" #include "mongo/db/stats/fill_locker_info.h" #include "mongo/db/transaction_history_iterator.h" #include "mongo/stdx/memory.h" @@ -56,6 +57,36 @@ #include "mongo/util/net/sock.h" namespace mongo { + +// Server parameter that dictates the lifetime given to each transaction. +// Transactions must eventually expire to preempt storage cache pressure immobilizing the system. +server_parameter_storage_type<int, ServerParameterType::kStartupAndRuntime>::value_type + transactionLifetimeLimitSeconds(60); + +/** + * Implements a validation function for server parameter 'transactionLifetimeLimitSeconds' + * instantiated above. 'transactionLifetimeLimitSeconds' can only be set to >= 1. + */ +class ExportedTransactionLifetimeLimitSeconds + : public ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime> { +public: + ExportedTransactionLifetimeLimitSeconds() + : ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime>( + ServerParameterSet::getGlobal(), + "transactionLifetimeLimitSeconds", + &transactionLifetimeLimitSeconds) {} + + Status validate(const std::int32_t& potentialNewValue) override { + if (potentialNewValue < 1) { + return Status(ErrorCodes::BadValue, + "transactionLifetimeLimitSeconds must be greater than or equal to 1s"); + } + + return Status::OK(); + } + +} exportedTransactionLifetimeLimitSeconds; + namespace { void fassertOnRepeatedExecution(const LogicalSessionId& lsid, @@ -520,8 +551,10 @@ void Session::_beginOrContinueTxn(WithLock wl, ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo40)); _setActiveTxn(wl, txnNumber); - _txnState = MultiDocumentTransactionState::kInProgress; _autocommit = false; + _txnState = MultiDocumentTransactionState::kInProgress; + _transactionExpireDate = + Date_t::now() + stdx::chrono::seconds{transactionLifetimeLimitSeconds.load()}; } else { // Execute a retryable write or snapshot read. invariant(startTransaction == boost::none); @@ -708,10 +741,23 @@ void Session::unstashTransactionResources(OperationContext* opCtx, const std::st void Session::abortArbitraryTransaction() { stdx::lock_guard<stdx::mutex> lock(_mutex); - if (_txnState == MultiDocumentTransactionState::kInProgress || - _txnState == MultiDocumentTransactionState::kInSnapshotRead) { - _abortTransaction(lock); + _abortArbitraryTransaction(lock); +} + +void Session::abortArbitraryTransactionIfExpired() { + stdx::lock_guard<stdx::mutex> lock(_mutex); + if (!_transactionExpireDate || _transactionExpireDate >= Date_t::now()) { + return; + } + _abortArbitraryTransaction(lock); +} + +void Session::_abortArbitraryTransaction(WithLock lock) { + if (_txnState != MultiDocumentTransactionState::kInProgress && + _txnState != MultiDocumentTransactionState::kInSnapshotRead) { + return; } + _abortTransaction(lock); } void Session::abortActiveTransaction(OperationContext* opCtx) { diff --git a/src/mongo/db/session.h b/src/mongo/db/session.h index 568a91a055f..5404ff4bc77 100644 --- a/src/mongo/db/session.h +++ b/src/mongo/db/session.h @@ -39,11 +39,14 @@ #include "mongo/db/repl/read_concern_args.h" #include "mongo/db/session_txn_record_gen.h" #include "mongo/db/storage/recovery_unit.h" +#include "mongo/platform/atomic_word.h" #include "mongo/stdx/unordered_map.h" #include "mongo/util/concurrency/with_lock.h" namespace mongo { +extern AtomicInt32 transactionLifetimeLimitSeconds; + class OperationContext; class UpdateRequest; @@ -246,6 +249,12 @@ public: */ void abortArbitraryTransaction(); + /** + * Same as abortArbitraryTransaction, except only executes if _transactionExpireDate indicates + * that the transaction has expired. + */ + void abortArbitraryTransactionIfExpired(); + /* * Aborts the transaction inside the transaction, releasing transaction resources. * We're inside the transaction when we have the Session checked out and 'opCtx' owns the @@ -370,6 +379,8 @@ private: std::vector<StmtId> stmtIdsWritten, const repl::OpTime& lastStmtIdWriteTs); + void _abortArbitraryTransaction(WithLock); + // Releases stashed transaction resources to abort the transaction. void _abortTransaction(WithLock); @@ -439,6 +450,13 @@ private: // Set in _beginOrContinueTxn and applies to the activeTxn on the session. bool _autocommit{true}; + + // Set when a snapshot read / transaction begins. Alleviates cache pressure by limiting how long + // a snapshot will remain open and available. Checked in combination with _txnState to determine + // whether the transaction should be aborted. + // This is unset until a transaction begins on the session, and then reset only when new + // transactions begin. + boost::optional<Date_t> _transactionExpireDate; }; } // namespace mongo |