From 136178a12e6b4fe41b6be047ee04db60a69af33e Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Thu, 12 Apr 2018 16:44:36 -0400 Subject: SERVER-31767 Provide a window of snapshot history that is accessible for PIT reads --- src/mongo/db/SConscript | 69 ++++++++- src/mongo/db/db.cpp | 7 +- ...iodic_runner_job_abort_expired_transactions.cpp | 2 +- ...runner_job_decrease_snapshot_cache_pressure.cpp | 92 ++++++++++++ ...c_runner_job_decrease_snapshot_cache_pressure.h | 45 ++++++ src/mongo/db/repl/storage_interface.h | 2 +- src/mongo/db/service_context_d_test_fixture.h | 2 +- .../db/service_context_devnull_test_fixture.cpp | 70 +++++++++ .../db/service_context_devnull_test_fixture.h | 57 +++++++ src/mongo/db/service_entry_point_common.cpp | 10 ++ src/mongo/db/snapshot_window_options.cpp | 167 +++++++++++++++++++++ src/mongo/db/snapshot_window_options.h | 108 +++++++++++++ src/mongo/db/snapshot_window_util.cpp | 147 ++++++++++++++++++ src/mongo/db/snapshot_window_util.h | 77 ++++++++++ src/mongo/db/snapshot_window_util_test.cpp | 156 +++++++++++++++++++ src/mongo/db/storage/devnull/SConscript | 3 + src/mongo/db/storage/devnull/devnull_kv_engine.cpp | 11 ++ src/mongo/db/storage/devnull/devnull_kv_engine.h | 9 ++ src/mongo/db/storage/kv/kv_engine.h | 19 ++- src/mongo/db/storage/kv/kv_engine_test_harness.h | 8 + src/mongo/db/storage/kv/kv_storage_engine.cpp | 16 +- src/mongo/db/storage/kv/kv_storage_engine.h | 8 +- src/mongo/db/storage/storage_engine.h | 31 +++- src/mongo/db/storage/test_harness_helper.h | 11 +- src/mongo/db/storage/wiredtiger/SConscript | 1 + .../db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 114 +++++++++++--- .../db/storage/wiredtiger/wiredtiger_kv_engine.h | 44 +++++- .../wiredtiger/wiredtiger_recovery_unit_test.cpp | 52 ++++++- .../wiredtiger/wiredtiger_server_status.cpp | 3 +- .../db/storage/wiredtiger/wiredtiger_util.cpp | 34 +++++ src/mongo/db/storage/wiredtiger/wiredtiger_util.h | 20 +++ 31 files changed, 1354 insertions(+), 41 deletions(-) create mode 100644 src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp create mode 100644 src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h create mode 100644 src/mongo/db/service_context_devnull_test_fixture.cpp create mode 100644 src/mongo/db/service_context_devnull_test_fixture.h create mode 100644 src/mongo/db/snapshot_window_options.cpp create mode 100644 src/mongo/db/snapshot_window_options.h create mode 100644 src/mongo/db/snapshot_window_util.cpp create mode 100644 src/mongo/db/snapshot_window_util.h create mode 100644 src/mongo/db/snapshot_window_util_test.cpp (limited to 'src/mongo/db') diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 59d12eb3f23..dbf3eb40306 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -763,6 +763,7 @@ env.Library( '$BUILD_DIR/mongo/base', ], LIBDEPS_PRIVATE=[ + 'snapshot_window_util', '$BUILD_DIR/mongo/db/auth/auth', '$BUILD_DIR/mongo/db/auth/authprivilege', '$BUILD_DIR/mongo/db/command_can_run_here', @@ -1464,9 +1465,56 @@ env.Library( source=[ 'periodic_runner_job_abort_expired_transactions.cpp', ], - LIBDEPS=[ - '$BUILD_DIR/mongo/util/periodic_runner', + LIBDEPS_PRIVATE=[ 'kill_sessions_local', + '$BUILD_DIR/mongo/util/periodic_runner', + ], +) + +env.Library( + target='periodic_runner_job_decrease_snapshot_cache_pressure', + source=[ + 'periodic_runner_job_decrease_snapshot_cache_pressure.cpp', + ], + LIBDEPS_PRIVATE=[ + 'snapshot_window_options', + 'snapshot_window_util', + '$BUILD_DIR/mongo/db/service_context', + '$BUILD_DIR/mongo/util/periodic_runner', + ], +) + +env.Library( + target='snapshot_window_options', + source=[ + 'snapshot_window_options.cpp', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/server_parameters', + ], +) + +env.Library( + target='snapshot_window_util', + source=[ + 'snapshot_window_util.cpp', + ], + LIBDEPS_PRIVATE=[ + 'snapshot_window_options', + '$BUILD_DIR/mongo/db/service_context', + ], +) + +env.CppUnitTest( + target='snapshot_window_util_test', + source=[ + 'snapshot_window_util_test.cpp', + ], + LIBDEPS_PRIVATE=[ + 'service_context_d', + 'service_context_devnull_test_fixture', + 'snapshot_window_options', + 'snapshot_window_util', ], ) @@ -1694,6 +1742,23 @@ env.Library( ], ) +env.Library( + target= 'service_context_devnull_test_fixture', + source= [ + 'service_context_devnull_test_fixture.cpp', + ], + LIBDEPS=[ + # this library is required only because of SERVER-29908 + '$BUILD_DIR/mongo/db/s/sharding_runtime_d', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/auth/authmocks', + '$BUILD_DIR/mongo/db/storage/devnull/storage_devnull', + '$BUILD_DIR/mongo/unittest/unittest', + 'service_context_d', + ], +) + env.Library( target='log_process_details', source=[ diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 5ddf80abdd9..6cb7c322667 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -95,6 +95,7 @@ #include "mongo/db/op_observer_registry.h" #include "mongo/db/operation_context.h" #include "mongo/db/periodic_runner_job_abort_expired_transactions.h" +#include "mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/repair_database_and_check_version.h" #include "mongo/db/repl/drop_pending_collection_reaper.h" @@ -601,13 +602,17 @@ ExitCode _initAndListen(int listenPort) { SessionKiller::set(serviceContext, std::make_shared(serviceContext, killSessionsLocal)); - // Start up a background task to periodically check for and kill expired transactions. + // Start up a background task to periodically check for and kill expired transactions; and a + // background task to periodically check for and decrease cache pressure by decreasing the + // target size setting for the storage engine's window of available snapshots. + // // Only do this on storage engines supporting snapshot reads, which hold resources we wish to // release periodically in order to avoid storage cache pressure build up. auto storageEngine = serviceContext->getStorageEngine(); invariant(storageEngine); if (storageEngine->supportsReadConcernSnapshot()) { startPeriodicThreadToAbortExpiredTransactions(serviceContext); + startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(serviceContext); } // Set up the logical session cache diff --git a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp index 84959e6fd3c..196a0f1dca9 100644 --- a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp +++ b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp @@ -70,7 +70,7 @@ void startPeriodicThreadToAbortExpiredTransactions(ServiceContext* serviceContex period = (period < 1) ? 1 : period; period = (period > 60) ? 60 : period; - if (++seconds < period) { + if (++seconds <= period) { return; } diff --git a/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp new file mode 100644 index 00000000000..351302a3d4c --- /dev/null +++ b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp @@ -0,0 +1,92 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h" + +#include "mongo/db/client.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/service_context.h" +#include "mongo/db/snapshot_window_options.h" +#include "mongo/db/snapshot_window_util.h" +#include "mongo/util/log.h" +#include "mongo/util/periodic_runner.h" + +namespace mongo { + +void startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(ServiceContext* serviceContext) { + // Enforce calling this function once, and only once. + static bool firstCall = true; + invariant(firstCall); + firstCall = false; + + auto periodicRunner = serviceContext->getPeriodicRunner(); + invariant(periodicRunner); + + // PeriodicRunner does not currently support altering the period of a job. So we are giving this + // job a 1 second period on PeriodicRunner and incrementing a static variable 'seconds' on each + // run until we reach checkCachePressurePeriodSeconds, at which point we run the code and reset + // 'seconds'. Etc. + PeriodicRunner::PeriodicJob job( + "startPeriodicThreadToDecreaseSnapshotHistoryCachePressure", + [](Client* client) { + try { + static int seconds = 0; + int checkPressurePeriod = + snapshotWindowParams.checkCachePressurePeriodSeconds.load(); + + invariant(checkPressurePeriod >= 1); + + if (++seconds <= checkPressurePeriod) { + return; + } + + seconds = 0; + + // The opCtx destructor handles unsetting itself from the Client. + // (The PeriodicRunnerASIO's Client must be reset before returning.) + auto opCtx = client->makeOperationContext(); + + SnapshotWindowUtil::decreaseTargetSnapshotWindowSize(opCtx.get()); + } catch (const DBException& ex) { + if (!ErrorCodes::isShutdownError(ex.toStatus().code())) { + warning() << "Periodic task to check for and decrease cache pressure caused by " + "maintaining too much snapshot history failed! Caused by: " + << ex.toStatus(); + } + } + }, + Seconds(1)); + + periodicRunner->scheduleJob(std::move(job)); +} + +} // namespace mongo diff --git a/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h new file mode 100644 index 00000000000..3ff0be0dae9 --- /dev/null +++ b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h @@ -0,0 +1,45 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +namespace mongo { + +class ServiceContext; + +/** + * Periodically checks for storage engine cache pressure to determine whether the maintained + * snapshot history window target setting should be decreased. Maintaining too much snapshot and + * write history can slow down the system. Runs once every checkCachePressurePeriodSeconds. + * + * This function should only ever be called once, during mongod server startup (db.cpp). + * The PeriodicRunner will handle shutting down the job on shutdown, no extra handling necessary. + */ +void startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(ServiceContext* serviceContext); + +} // namespace mongo diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h index 5b526456752..ca3ce588fa7 100644 --- a/src/mongo/db/repl/storage_interface.h +++ b/src/mongo/db/repl/storage_interface.h @@ -59,7 +59,7 @@ struct TimestampedBSONObj { /** * Storage interface used by the replication system to interact with storage. - * This interface provides seperation of concerns and a place for mocking out test + * This interface provides separation of concerns and a place for mocking out test * interactions. * * The grouping of functionality includes general collection helpers, and more specific replication diff --git a/src/mongo/db/service_context_d_test_fixture.h b/src/mongo/db/service_context_d_test_fixture.h index 35932b64814..bd56f4f2ae6 100644 --- a/src/mongo/db/service_context_d_test_fixture.h +++ b/src/mongo/db/service_context_d_test_fixture.h @@ -35,7 +35,7 @@ namespace mongo { /** - * Test fixture class for tests that use either the "ephemeralForTest" or "devnull" storage engines. + * Test fixture class for tests that use the "ephemeralForTest" storage engine. */ class ServiceContextMongoDTest : public unittest::Test { public: diff --git a/src/mongo/db/service_context_devnull_test_fixture.cpp b/src/mongo/db/service_context_devnull_test_fixture.cpp new file mode 100644 index 00000000000..cefea8a0053 --- /dev/null +++ b/src/mongo/db/service_context_devnull_test_fixture.cpp @@ -0,0 +1,70 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/service_context_devnull_test_fixture.h" + +#include "mongo/db/client.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/service_context.h" +#include "mongo/db/service_context_d.h" +#include "mongo/db/storage/storage_engine_init.h" +#include "mongo/stdx/memory.h" +#include "mongo/unittest/temp_dir.h" +#include "mongo/util/assert_util.h" + +namespace mongo { + +void ServiceContextDevnullTestFixture::setUp() { + Test::setUp(); + Client::initThread(getThreadName()); + + auto const serviceContext = getServiceContext(); + if (!serviceContext->getStorageEngine()) { + // When using the 'devnull' storage engine, it is fine for the temporary directory to + // go away after the global storage engine is initialized. + unittest::TempDir tempDir("service_context_devnull_test"); + mongo::storageGlobalParams.dbpath = tempDir.path(); + mongo::storageGlobalParams.engine = "devnull"; + mongo::storageGlobalParams.engineSetByUser = true; + createLockFile(serviceContext); + initializeStorageEngine(serviceContext); + } +} + +void ServiceContextDevnullTestFixture::tearDown() { + Client::destroy(); + Test::tearDown(); +} + +ServiceContext* ServiceContextDevnullTestFixture::getServiceContext() { + return getGlobalServiceContext(); +} + +} // namespace mongo diff --git a/src/mongo/db/service_context_devnull_test_fixture.h b/src/mongo/db/service_context_devnull_test_fixture.h new file mode 100644 index 00000000000..77a0d551965 --- /dev/null +++ b/src/mongo/db/service_context_devnull_test_fixture.h @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/unittest/unittest.h" + +namespace mongo { + +class ServiceContext; + +/** + * Test fixture class for mongod tests that use the "devnull" storage engine. + */ +class ServiceContextDevnullTestFixture : public unittest::Test { +public: + /** + * Initializes the devnull engine as the global storage engine. + * Also sets up a Client on the thread, which can be accessed via 'cc()'. + */ + void setUp() override; + + void tearDown() override; + + /** + * Returns a service context, which is only valid for this instance of the test. + * Must not be called before setUp() or after tearDown(). + */ + ServiceContext* getServiceContext(); +}; + +} // namespace mongo diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index f6cabff5bb1..116a5098a7d 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -70,6 +70,7 @@ #include "mongo/db/s/sharding_state.h" #include "mongo/db/service_entry_point_common.h" #include "mongo/db/session_catalog.h" +#include "mongo/db/snapshot_window_util.h" #include "mongo/db/stats/counters.h" #include "mongo/db/stats/top.h" #include "mongo/rpc/factory.h" @@ -880,6 +881,15 @@ void execCommandDatabase(OperationContext* opCtx, onCannotImplicitlyCreateCollection(opCtx, cannotImplicitCreateCollInfo->getNss()) .ignore(); } + } else if (e.code() == ErrorCodes::SnapshotTooOld) { + // SnapshotTooOld errors indicate that PIT ops are failing to find an available snapshot + // at their specified atClusterTime. Therefore, we'll try to increase the snapshot + // history window that the storage engine maintains in order to increase the likelihood + // of successful future PIT atClusterTime requests. + auto engine = opCtx->getServiceContext()->getStorageEngine(); + if (engine && engine->supportsReadConcernSnapshot()) { + SnapshotWindowUtil::increaseTargetSnapshotWindowSize(opCtx); + } } // Append the error labels for transient transaction errors. diff --git a/src/mongo/db/snapshot_window_options.cpp b/src/mongo/db/snapshot_window_options.cpp new file mode 100644 index 00000000000..3b19de96fb5 --- /dev/null +++ b/src/mongo/db/snapshot_window_options.cpp @@ -0,0 +1,167 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/snapshot_window_options.h" + +#include "mongo/db/server_parameters.h" +#include "mongo/platform/compiler.h" + +namespace mongo { + +SnapshotWindowParams snapshotWindowParams; + +/** + * Provides validation for snapshot window server parameter settings. + */ + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedMaxTargetSnapshotHistoryWindowInSeconds = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "maxTargetSnapshotHistoryWindowInSeconds", + &snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds)) + -> withValidator([](const int32_t& potentialNewValue) { + if (potentialNewValue < 0) { + return Status(ErrorCodes::BadValue, + str::stream() << "maxTargetSnapshotHistoryWindowInSeconds must be " + "greater than or equal to 0. '" + << potentialNewValue + << "' is an invalid setting."); + } + return Status::OK(); + }); + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedCachePressureThreshold = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "cachePressureThreshold", + &snapshotWindowParams.cachePressureThreshold)) + -> withValidator([](const int32_t& potentialNewValue) { + if (potentialNewValue < 0 || potentialNewValue > 100) { + return Status(ErrorCodes::BadValue, + str::stream() << "cachePressureThreshold must be greater than or " + "equal to 0 and less than or equal to 100. '" + << potentialNewValue + << "' is an invalid setting."); + } + return Status::OK(); + }); + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedSnapshotWindowMultiplicativeDecrease = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "snapshotWindowMultiplicativeDecrease", + &snapshotWindowParams.snapshotWindowMultiplicativeDecrease)) + -> withValidator([](const double& potentialNewValue) { + if (potentialNewValue <= 0 || potentialNewValue >= 1) { + return Status(ErrorCodes::BadValue, + str::stream() + << "snapshotWindowMultiplicativeDecrease must be greater " + "than 0 and less than 1. '" + << potentialNewValue + << "' is an invalid setting."); + } + + return Status::OK(); + }); + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedSnapshotWindowAdditiveIncreaseSeconds = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "snapshotWindowAdditiveIncreaseSeconds", + &snapshotWindowParams + .snapshotWindowAdditiveIncreaseSeconds)) -> withValidator([](const int32_t& + potentialNewValue) { + if (potentialNewValue < 1) { + return Status( + ErrorCodes::BadValue, + str::stream() + << "snapshotWindowAdditiveIncreaseSeconds must be greater than or equal to 1. '" + << potentialNewValue + << "' is an invalid setting."); + } + + return Status::OK(); + }); + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedMinMillisBetweenSnapshotWindowInc = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "minMillisBetweenSnapshotWindowInc", + &snapshotWindowParams.minMillisBetweenSnapshotWindowInc)) + -> withValidator([](const int32_t& potentialNewValue) { + if (potentialNewValue < 1) { + return Status( + ErrorCodes::BadValue, + str::stream() + << "minMillisBetweenSnapshotWindowInc must be greater than or equal to 1. '" + << potentialNewValue + << "' is an invalid setting."); + } + + return Status::OK(); + }); + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedMinMillisBetweenSnapshotWindowDec = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "minMillisBetweenSnapshotWindowDec", + &snapshotWindowParams.minMillisBetweenSnapshotWindowDec)) + -> withValidator([](const int32_t& potentialNewValue) { + if (potentialNewValue < 1) { + return Status( + ErrorCodes::BadValue, + str::stream() + << "minMillisBetweenSnapshotWindowDec must be greater than or equal to 1. '" + << potentialNewValue + << "' is an invalid setting."); + } + + return Status::OK(); + }); + +MONGO_COMPILER_VARIABLE_UNUSED auto _exportedCheckCachePressurePeriodSeconds = + (new ExportedServerParameter( + ServerParameterSet::getGlobal(), + "checkCachePressurePeriodSeconds", + &snapshotWindowParams.checkCachePressurePeriodSeconds)) + -> withValidator([](const int32_t& potentialNewValue) { + if (potentialNewValue < 1) { + return Status( + ErrorCodes::BadValue, + str::stream() + << "checkCachePressurePeriodSeconds must be greater than or equal to 1. '" + << potentialNewValue + << "' is an invalid setting."); + } + + return Status::OK(); + }); + +} // namespace mongo diff --git a/src/mongo/db/snapshot_window_options.h b/src/mongo/db/snapshot_window_options.h new file mode 100644 index 00000000000..992ff718212 --- /dev/null +++ b/src/mongo/db/snapshot_window_options.h @@ -0,0 +1,108 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include "mongo/platform/atomic_proxy.h" +#include "mongo/platform/atomic_word.h" + +namespace mongo { + +/** + * This is a collection of parameters that affect how much snapshot history the storage engine will + * maintain to support point-in-time transactions (read or write). This is referred to as the + * snapshot window. + */ +struct SnapshotWindowParams { + + // maxTargetSnapshotHistoryWindowInSeconds (startup & runtime server paramter, range 0+). + // + // Dictates the maximum lag in seconds oldest_timestamp should be behind stable_timestamp. + // targetSnapshotHistoryWindowInSeconds below is the actual active lag setting target. + // + // Note that the window size can become greater than this if an ongoing operation is holding an + // older snapshot open. + AtomicInt32 maxTargetSnapshotHistoryWindowInSeconds{100}; + + // targetSnapshotHistoryWindowInSeconds (not a server parameter, range 0+). + // + // Dictates the target lag in seconds oldest_timestamp should be set behind stable_timestamp. + // Should only be set in the range [0, maxTargetSnapshotHistoryWindowInSeconds]. + // + // Note that this is the history window we attempt to maintain, but our current system state may + // not always reflect it: the window can only change as more writes come in, so it can take time + // for the actual window size to catch up with a change. This value guides actions whenever the + // system goes to update the oldest_timestamp value. + AtomicInt32 targetSnapshotHistoryWindowInSeconds{ + maxTargetSnapshotHistoryWindowInSeconds.load()}; + + // cachePressureThreshold (startup & runtime server paramter, range [0, 100]). + // + // Dictates what percentage of cache in use is considered too high. This setting helps preempt + // storage cache pressure immobilizing the system. Attempts to increase + // targetSnapshotHistoryWindowInSeconds will be ignored when the cache pressure reaches this + // threshold. Additionally, a periodic task will decrease targetSnapshotHistoryWindowInSeconds + // when cache pressure exceeds the threshold. + AtomicInt32 cachePressureThreshold{50}; + + // snapshotWindowMultiplicativeDecrease (startup & runtime server paramter, range (0,1)). + // + // Controls by what multiplier the target snapshot history window setting is decreased when + // cache pressure becomes too high, per the cachePressureThreshold setting. + AtomicDouble snapshotWindowMultiplicativeDecrease{0.75}; + + // snapshotWindowAdditiveIncreaseSeconds (startup & runtime server paramter, range 1+). + // + // Controls by how much the target snapshot history window setting is increased when cache + // pressure is OK, per cachePressureThreshold, and we need to service older snapshots for global + // point-in-time reads. + AtomicInt32 snapshotWindowAdditiveIncreaseSeconds{2}; + + // minMillisBetweenSnapshotWindowInc (startup & runtime server paramter, range 0+). + // minMillisBetweenSnapshotWindowDec (startup & runtime server paramter, range 0+). + // + // Controls how often attempting to increase/decrease the target snapshot window will have an + // effect. Multiple callers within minMillisBetweenSnapshotWindowInc will have the same effect + // as one. This protects the system because it takes time for the target snapshot window to + // affect the actual storage engine snapshot window. The stable timestamp must move forward for + // the window between it and oldest timestamp to grow or shrink. + AtomicInt32 minMillisBetweenSnapshotWindowInc{500}; + AtomicInt32 minMillisBetweenSnapshotWindowDec{500}; + + // checkCachePressurePeriodSeconds (startup & runtime server paramter, range 1+) + // + // Controls the period of the task that checks for cache pressure and decreases + // targetSnapshotHistoryWindowInSeconds if the pressure is above cachePressureThreshold. The + // target window size setting must not be decreased too fast because time must be allowed for + // the storage engine to attempt to act on the new setting. + AtomicInt32 checkCachePressurePeriodSeconds{5}; +}; + +extern SnapshotWindowParams snapshotWindowParams; + +} // namespace mongo diff --git a/src/mongo/db/snapshot_window_util.cpp b/src/mongo/db/snapshot_window_util.cpp new file mode 100644 index 00000000000..d364187118a --- /dev/null +++ b/src/mongo/db/snapshot_window_util.cpp @@ -0,0 +1,147 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/snapshot_window_util.h" + +#include "mongo/db/operation_context.h" +#include "mongo/db/service_context.h" +#include "mongo/db/snapshot_window_options.h" +#include "mongo/db/storage/storage_engine.h" +#include "mongo/stdx/mutex.h" +#include "mongo/util/concurrency/with_lock.h" +#include "mongo/util/log.h" + +namespace mongo { + +namespace SnapshotWindowUtil { + +// Adds concurrency control to increaseTargetSnapshotWindowSize() and +// decreaseTargetSnapshotWindowSize(). They should not run concurrently with themselves or one +// another, since they act on and modify the same storage parameters. Further guards the static +// variables "_snapshotWindowLastDecreasedAt" and "_snapshotWindowLastIncreasedAt" used in +// increaseTargetSnapshotWindowSize() and decreaseSnapshowWindow(). +stdx::mutex snapshotWindowMutex; + +namespace { + +void _decreaseTargetSnapshotWindowSize(WithLock lock, OperationContext* opCtx) { + // Tracks the last time that the snapshot window was decreased so that it does not go down so + // fast that the system does not have time to react and reduce snapshot availability. + static Date_t _snapshotWindowLastDecreasedAt{Date_t::min()}; + + if (_snapshotWindowLastDecreasedAt > + (Date_t::now() - + Milliseconds(snapshotWindowParams.minMillisBetweenSnapshotWindowDec.load()))) { + // We have already decreased the window size in the last minMillisBetweenSnapshotWindowDec + // milliseconds. + return; + } + + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.store( + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load() * + snapshotWindowParams.snapshotWindowMultiplicativeDecrease.load()); + + // Try to set the oldest_timestamp immediately without waiting for a write to adjust the + // window. May or may not work depending on the state of the system. + StorageEngine* engine = opCtx->getServiceContext()->getStorageEngine(); + invariant(engine); + engine->setOldestTimestampFromStable(); + + _snapshotWindowLastDecreasedAt = Date_t::now(); +} + +} // namespace + +void increaseTargetSnapshotWindowSize(OperationContext* opCtx) { + stdx::unique_lock lock(snapshotWindowMutex); + + // Tracks the last time that the snapshot window was increased so that it does not go up so fast + // that the storage engine does not have time to improve snapshot availability. + static Date_t _snapshotWindowLastIncreasedAt{Date_t::min()}; + + if (_snapshotWindowLastIncreasedAt > + (Date_t::now() - + Milliseconds(snapshotWindowParams.minMillisBetweenSnapshotWindowInc.load()))) { + // We have already increased the window size in the last minMillisBetweenSnapshotWindowInc + // milliseconds. + return; + } + + // If the cache pressure is already too high, we will not put more pressure on it by increasing + // the window size. + StorageEngine* engine = opCtx->getServiceContext()->getStorageEngine(); + if (engine && engine->isCacheUnderPressure(opCtx)) { + warning() << "Attempted to increase the time window of available snapshots for " + "point-in-time operations (readConcern level 'snapshot' or transactions), but " + "the storage engine cache pressure, per the cachePressureThreshold setting of " + "'" + << snapshotWindowParams.cachePressureThreshold.load() + << "', is too high to allow it to increase. If this happens frequently, consider " + "either increasing the cache pressure threshold or increasing the memory " + "available to the storage engine cache, in order to improve the success rate " + "or speed of point-in-time requests."; + _decreaseTargetSnapshotWindowSize(lock, opCtx); + return; + } + + if (snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load() == + snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load()) { + warning() << "Attempted to increase the time window of available snapshots for " + "point-in-time operations (readConcern level 'snapshot' or transactions), but " + "maxTargetSnapshotHistoryWindowInSeconds has already been reached. If this " + "happens frequently, consider increasing the " + "maxTargetSnapshotHistoryWindowInSeconds setting value, which is currently " + "set to '" + << snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load() << "'."; + return; + } + + int increasedSnapshotWindow = snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load() + + snapshotWindowParams.snapshotWindowAdditiveIncreaseSeconds.load(); + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.store( + std::min(increasedSnapshotWindow, + snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load())); + + _snapshotWindowLastIncreasedAt = Date_t::now(); +} + +void decreaseTargetSnapshotWindowSize(OperationContext* opCtx) { + stdx::unique_lock lock(snapshotWindowMutex); + + StorageEngine* engine = opCtx->getServiceContext()->getStorageEngine(); + if (engine && engine->isCacheUnderPressure(opCtx)) { + _decreaseTargetSnapshotWindowSize(lock, opCtx); + } +} + +} // namespace SnapshotWindowUtil +} // namespace mongo diff --git a/src/mongo/db/snapshot_window_util.h b/src/mongo/db/snapshot_window_util.h new file mode 100644 index 00000000000..f78c5005460 --- /dev/null +++ b/src/mongo/db/snapshot_window_util.h @@ -0,0 +1,77 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +namespace mongo { + +class OperationContext; + +/** + * Function helpers to manipulate parameters affecting the snapshot time window size that the + * storage engine maintains of available snapshots for point-in-time reads. + */ +namespace SnapshotWindowUtil { + +/** + * Attempts to increase the setting that controls the window of time between stable_timestamp and + * oldest_timestamp, in order to provide a greater range of available snapshots for point-in-time + * operations. The window will not be increased, however, if the cache pressure is currently too + * high. This function will be called when server requests return SnapshotTooOld (or similar) + * errors. Note that this will not immediately affect the oldest_timestamp. Rather, it affects + * actions taken next time oldest_timestamp is updated, usually when the stable timestamp is + * advanced. + * + * Implements an additive increase algorithm. + * + * Calling many times all at once has the same effect as calling once. The last update time is + * tracked and attempts to increase the window are limited to once in + * minMillisBetweenSnapshotWindowInc. This is to protect against a sudden wave of function calls due + * to simultaneous SnapshotTooOld errors. Some time must be allowed for the increased target + * snapshot window size to have an effect. The target size can also never exceed + * maxTargetSnapshotHistoryWindowInSeconds. + */ +void increaseTargetSnapshotWindowSize(OperationContext* opCtx); + +/** + * Attempts to decrease (if not already zero) the setting that affects the size of the window of + * time between stable_timestamp and oldest_timestamp in order to reduce storage engine cache + * pressure. The window target will not be decreased, however, if the cache is not currently under + * pressure. Pressure can occur when too much history is being maintained for point-in-time + * snapshots. Note that this will not necessarily immediately affect the actual window size; rather, + * it affects actions taken whenever oldest_timestamp is updated, usually when the stable timestamp + * is advanced. + * + * This will make one attempt to immediately adjust the window size if possible. + * + * Implements a multiplicative decrease algorithm. + */ +void decreaseTargetSnapshotWindowSize(OperationContext* opCtx); + +} // namespace SnapshotWindowUtil +} // namespace mongo diff --git a/src/mongo/db/snapshot_window_util_test.cpp b/src/mongo/db/snapshot_window_util_test.cpp new file mode 100644 index 00000000000..d668250f5d9 --- /dev/null +++ b/src/mongo/db/snapshot_window_util_test.cpp @@ -0,0 +1,156 @@ +/** + * Copyright (C) 2018 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/snapshot_window_util.h" + +#include "mongo/db/client.h" +#include "mongo/db/service_context.h" +#include "mongo/db/service_context_devnull_test_fixture.h" +#include "mongo/db/snapshot_window_options.h" +#include "mongo/db/storage/storage_options.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { + +using namespace SnapshotWindowUtil; + +/** + * Tests the functions in snapshot_window_util.h using a devnull storage engine. + */ +class SnapshotWindowTest : public ServiceContextDevnullTestFixture { +public: + void setUp() override { + ServiceContextDevnullTestFixture::setUp(); + _opCtx = cc().makeOperationContext(); + } + + void tearDown() override { + _opCtx.reset(); + ServiceContextDevnullTestFixture::tearDown(); + } + + ServiceContext::UniqueOperationContext _opCtx; +}; + +TEST_F(SnapshotWindowTest, DecreaseAndIncreaseSnapshotWindow) { + auto engine = getServiceContext()->getStorageEngine(); + invariant(engine); + + // Lower the time enforced between function calls to speed up testing. + // Dec must match Inc b/c increaseTargetWindowSize can call into decreaseTargetWindowSize. + snapshotWindowParams.minMillisBetweenSnapshotWindowInc.store(100); + snapshotWindowParams.minMillisBetweenSnapshotWindowDec.store(100); + + auto maxTargetSnapshotWindowSeconds = + snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load(); + auto snapshotWindowSeconds = snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(); + ASSERT_EQ(maxTargetSnapshotWindowSeconds, snapshotWindowSeconds); + + auto windowMultiplicativeDecrease = + snapshotWindowParams.snapshotWindowMultiplicativeDecrease.load(); + auto windowAdditiveIncrease = snapshotWindowParams.snapshotWindowAdditiveIncreaseSeconds.load(); + + auto cachePressureThreshold = snapshotWindowParams.cachePressureThreshold.load(); + auto minTimeBetweenInc = snapshotWindowParams.minMillisBetweenSnapshotWindowInc.load(); + + /** + * Test that decreasing the size succeeds when cache pressure is ABOVE the threshold + */ + + engine->setCachePressureForTest(cachePressureThreshold + 5); + + decreaseTargetSnapshotWindowSize(_opCtx.get()); + auto snapshotWindowSecondsOne = + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(); + + ASSERT_GT(snapshotWindowSeconds, snapshotWindowSecondsOne); + ASSERT_EQ(snapshotWindowSecondsOne, + static_cast(snapshotWindowSeconds * windowMultiplicativeDecrease)); + + /** + * Test that increasing the size SUCCEEDS when the cache pressure is BELOW the threshold. + */ + + engine->setCachePressureForTest(cachePressureThreshold - 5); + + increaseTargetSnapshotWindowSize(_opCtx.get()); + auto snapshotWindowSecondsTwo = + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(); + + ASSERT_EQ(snapshotWindowSecondsTwo, snapshotWindowSecondsOne + windowAdditiveIncrease); + + /** + * Test that increasing the size FAILS when the cache pressure is ABOVE the threshold, and + * instead this causes the size to be decreased. + */ + + engine->setCachePressureForTest(cachePressureThreshold + 5); + + // Sleep for a time because increaseTargetSnapshotWindowSize() enforces a wait time between + // updates. + sleepmillis(2 * minTimeBetweenInc); + + increaseTargetSnapshotWindowSize(_opCtx.get()); + auto snapshotWindowSecondsThree = + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(); + + ASSERT_EQ(snapshotWindowSecondsThree, + static_cast(snapshotWindowSecondsTwo * windowMultiplicativeDecrease)); + + engine->setCachePressureForTest(cachePressureThreshold - 5); + + /** + * Test that the size cannot be increased above the maximum size. + */ + + // Integers round down, so add 1 to make sure it reaches the max. + int numIncreasesToReachMax = + (maxTargetSnapshotWindowSeconds - snapshotWindowSecondsThree) / windowAdditiveIncrease + 1; + for (int i = 0; i < numIncreasesToReachMax; ++i) { + sleepmillis(2 * minTimeBetweenInc); + increaseTargetSnapshotWindowSize(_opCtx.get()); + } + + // Should be at max. + auto snapshotWindowSecondsFour = + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(); + ASSERT_EQ(snapshotWindowSecondsFour, maxTargetSnapshotWindowSeconds); + + // An attempt to increase beyond max should have no effect. + sleepmillis(2 * minTimeBetweenInc); + increaseTargetSnapshotWindowSize(_opCtx.get()); + auto snapshotWindowSecondsFive = + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(); + ASSERT_EQ(snapshotWindowSecondsFive, maxTargetSnapshotWindowSeconds); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/storage/devnull/SConscript b/src/mongo/db/storage/devnull/SConscript index 1d960936a60..fa34cf41f83 100644 --- a/src/mongo/db/storage/devnull/SConscript +++ b/src/mongo/db/storage/devnull/SConscript @@ -13,6 +13,9 @@ env.Library( '$BUILD_DIR/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store', '$BUILD_DIR/mongo/db/storage/kv/kv_prefix', ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/snapshot_window_options', + ], ) env.Library( diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp index 8e158dbc3ea..0fdb22b1d12 100644 --- a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp +++ b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp @@ -31,6 +31,7 @@ #include "mongo/db/storage/devnull/devnull_kv_engine.h" #include "mongo/base/disallow_copying.h" +#include "mongo/db/snapshot_window_options.h" #include "mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.h" #include "mongo/db/storage/record_store.h" #include "mongo/db/storage/sorted_data_interface.h" @@ -258,4 +259,14 @@ SortedDataInterface* DevNullKVEngine::getSortedDataInterface(OperationContext* o const IndexDescriptor* desc) { return new DevNullSortedDataInterface(); } + +bool DevNullKVEngine::isCacheUnderPressure(OperationContext* opCtx) const { + return (_cachePressureForTest >= snapshotWindowParams.cachePressureThreshold.load()); +} + +void DevNullKVEngine::setCachePressureForTest(int pressure) { + invariant(pressure >= 0 && pressure <= 100); + _cachePressureForTest = pressure; } + +} // namespace mongo diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.h b/src/mongo/db/storage/devnull/devnull_kv_engine.h index 5e8af42c74d..df356cfaae1 100644 --- a/src/mongo/db/storage/devnull/devnull_kv_engine.h +++ b/src/mongo/db/storage/devnull/devnull_kv_engine.h @@ -39,6 +39,9 @@ namespace mongo { class JournalListener; +/** + * The devnull storage engine is intended for unit and performance testing. + */ class DevNullKVEngine : public KVEngine { public: virtual ~DevNullKVEngine() {} @@ -92,6 +95,10 @@ public: return true; } + virtual bool isCacheUnderPressure(OperationContext* opCtx) const override; + + virtual void setCachePressureForTest(int pressure) override; + virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident) { return 1; } @@ -118,5 +125,7 @@ public: private: std::shared_ptr _catalogInfo; + + int _cachePressureForTest; }; } diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index f7b79356a04..65a9a281396 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -260,10 +260,27 @@ public: */ virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) {} + /** + * See `StorageEngine::setOldestTimestampFromStable` + */ + virtual void setOldestTimestampFromStable() {} + /** * See `StorageEngine::setOldestTimestamp` */ - virtual void setOldestTimestamp(Timestamp oldestTimestamp) {} + virtual void setOldestTimestamp(Timestamp newOldestTimestamp) {} + + /** + * See `StorageEngine::isCacheUnderPressure()` + */ + virtual bool isCacheUnderPressure(OperationContext* opCtx) const { + return false; + } + + /** + * See 'StorageEngine::setCachePressureForTest()' + */ + virtual void setCachePressureForTest(int pressure) {} /** * See `StorageEngine::supportsRecoverToStableTimestamp` diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.h b/src/mongo/db/storage/kv/kv_engine_test_harness.h index 64edb26da44..5191a2d344c 100644 --- a/src/mongo/db/storage/kv/kv_engine_test_harness.h +++ b/src/mongo/db/storage/kv/kv_engine_test_harness.h @@ -37,6 +37,14 @@ namespace mongo { class ClockSource; +/** + * Creates a harness for generic KVEngine testing of all KVEngine implementations. + * + * A particular KVHarnessHelper implementation (with a particular KVEngine implementation) will + * implement registerFactory() and create() such that generic unit tests can create() and test the + * particular KVHarnessHelper implementation. This library can be pulled into a particular + * implementation's CppUnitTest to exercise the generic test coverage on that implementation. + */ class KVHarnessHelper { public: virtual ~KVHarnessHelper() {} diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp index 58f868d4a8f..d954893a2a7 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.cpp +++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp @@ -509,8 +509,20 @@ void KVStorageEngine::setInitialDataTimestamp(Timestamp initialDataTimestamp) { _engine->setInitialDataTimestamp(initialDataTimestamp); } -void KVStorageEngine::setOldestTimestamp(Timestamp oldestTimestamp) { - _engine->setOldestTimestamp(oldestTimestamp); +void KVStorageEngine::setOldestTimestampFromStable() { + _engine->setOldestTimestampFromStable(); +} + +void KVStorageEngine::setOldestTimestamp(Timestamp newOldestTimestamp) { + _engine->setOldestTimestamp(newOldestTimestamp); +} + +bool KVStorageEngine::isCacheUnderPressure(OperationContext* opCtx) const { + return _engine->isCacheUnderPressure(opCtx); +} + +void KVStorageEngine::setCachePressureForTest(int pressure) { + return _engine->setCachePressureForTest(pressure); } bool KVStorageEngine::supportsRecoverToStableTimestamp() const { diff --git a/src/mongo/db/storage/kv/kv_storage_engine.h b/src/mongo/db/storage/kv/kv_storage_engine.h index 79ae83adf52..6ffe83b5210 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.h +++ b/src/mongo/db/storage/kv/kv_storage_engine.h @@ -117,7 +117,13 @@ public: virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override; - virtual void setOldestTimestamp(Timestamp oldestTimestamp) override; + virtual void setOldestTimestampFromStable() override; + + virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override; + + virtual bool isCacheUnderPressure(OperationContext* opCtx) const override; + + virtual void setCachePressureForTest(int pressure) override; virtual bool supportsRecoverToStableTimestamp() const override; diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index 64f19f5dadf..99cc8ebd76f 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -370,11 +370,40 @@ public: */ virtual void setInitialDataTimestamp(Timestamp timestamp) {} + /** + * Uses the current stable timestamp to set the oldest timestamp for which the storage engine + * must maintain snapshot history through. + * + * oldest_timestamp will be set to stable_timestamp adjusted by + * 'targetSnapshotHistoryWindowInSeconds' to create a window of available snapshots on the + * storage engine from oldest to stable. Furthermore, oldest_timestamp will never be set ahead + * of the oplog read timestamp, ensuring the oplog reader's 'read_timestamp' can always be + * serviced. + */ + virtual void setOldestTimestampFromStable() {} + /** * Sets the oldest timestamp for which the storage engine must maintain snapshot history * through. Additionally, all future writes must be newer or equal to this value. */ - virtual void setOldestTimestamp(Timestamp timestampa) {} + virtual void setOldestTimestamp(Timestamp timestamp) {} + + /** + * Indicates whether the storage engine cache is under pressure. + * + * Retrieves a cache pressure value in the range [0, 100] from the storage engine, and compares + * it against storageGlobalParams.cachePressureThreshold, a dynamic server parameter, to + * determine whether cache pressure is too high. + */ + virtual bool isCacheUnderPressure(OperationContext* opCtx) const { + return false; + } + + /** + * For unit tests only. Sets the cache pressure value with which isCacheUnderPressure() + * evalutates to 'pressure'. + */ + virtual void setCachePressureForTest(int pressure) {} /** * Notifies the storage engine that a replication batch has completed. diff --git a/src/mongo/db/storage/test_harness_helper.h b/src/mongo/db/storage/test_harness_helper.h index 8ce241bcb3e..c0891f0235d 100644 --- a/src/mongo/db/storage/test_harness_helper.h +++ b/src/mongo/db/storage/test_harness_helper.h @@ -43,6 +43,15 @@ #include "mongo/util/unowned_ptr.h" namespace mongo { + +/** + * Sets up an OperationContext with a Recovery Unit. Uses a ServiceContextNoop. + * + * A particular HarnessHelper implementation will implement registerHarnessHelperFactory() and + * newHarnessHelper() such that generic unit tests can create and test that particular + * HarnessHelper implementation. The newRecoveryUnit() implementation dictates what RecoveryUnit + * implementation the OperationContext has. + */ class HarnessHelper { public: virtual ~HarnessHelper() = 0; @@ -72,9 +81,9 @@ public: return &_serviceContext; } -private: virtual std::unique_ptr newRecoveryUnit() = 0; +private: ServiceContextNoop _serviceContext; ServiceContext::UniqueClient _client = _serviceContext.makeClient("hh"); }; diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript index aee0fb5d8a5..9d42061472d 100644 --- a/src/mongo/db/storage/wiredtiger/SConscript +++ b/src/mongo/db/storage/wiredtiger/SConscript @@ -80,6 +80,7 @@ if wiredtiger: 'storage_wiredtiger_customization_hooks', ], LIBDEPS_PRIVATE= [ + '$BUILD_DIR/mongo/db/snapshot_window_options', '$BUILD_DIR/mongo/util/options_parser/options_parser', ], ) diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 5a5c200abaa..483d50ea9df 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -65,6 +65,7 @@ #include "mongo/db/server_options.h" #include "mongo/db/server_parameters.h" #include "mongo/db/service_context.h" +#include "mongo/db/snapshot_window_options.h" #include "mongo/db/storage/journal_listener.h" #include "mongo/db/storage/storage_options.h" #include "mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h" @@ -1153,11 +1154,6 @@ bool WiredTigerKVEngine::initRsOplogBackgroundThread(StringData ns) { return initRsOplogBackgroundThreadCallback(ns); } -void WiredTigerKVEngine::setOldestTimestamp(Timestamp oldestTimestamp) { - constexpr bool doForce = true; - _setOldestTimestamp(oldestTimestamp, doForce); -} - namespace { MONGO_FP_DECLARE(WTPreserveSnapshotHistoryIndefinitely); @@ -1196,35 +1192,58 @@ void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp) { } invariant(static_cast(size) < sizeof(stableTSConfigString)); invariantWTOK(_conn->set_timestamp(_conn, stableTSConfigString)); + { + stdx::lock_guard lock(_stableTimestampMutex); + // set_timestamp above ignores backwards in time without force. + if (_stableTimestamp < stableTimestamp) { + _stableTimestamp = stableTimestamp; + } + } if (_checkpointThread) { _checkpointThread->setStableTimestamp(stableTimestamp); } - // Communicate to WiredTiger that it can clean up timestamp data earlier than the timestamp - // provided. No future queries will need point-in-time reads at a timestamp prior to the one - // provided here. + // Forward the oldest timestamp so that WiredTiger can clean up earlier timestamp data. if (!MONGO_FAIL_POINT(WTPreserveSnapshotHistoryIndefinitely)) { - _setOldestTimestamp(stableTimestamp); + setOldestTimestampFromStable(); } } -void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool force) { +void WiredTigerKVEngine::setOldestTimestampFromStable() { + Timestamp stableTimestamp; + { + stdx::lock_guard lock(_stableTimestampMutex); + stableTimestamp = _stableTimestamp; + } - if (oldestTimestamp == Timestamp()) { - // Nothing to set yet. + // Calculate what the oldest_timestamp should be from the stable_timestamp. The oldest + // timestamp should lag behind stable by 'targetSnapshotHistoryWindowInSeconds' to create a + // window of available snapshots. If the lag window is not yet large enough, we will not + // update/forward the oldest_timestamp yet and instead return early. + Timestamp newOldestTimestamp = _calculateHistoryLagFromStableTimestamp(stableTimestamp); + if (newOldestTimestamp.isNull()) { return; } + const auto oplogReadTimestamp = Timestamp(_oplogManager->getOplogReadTimestamp()); - if (!force && !oplogReadTimestamp.isNull() && oldestTimestamp > oplogReadTimestamp) { + if (!oplogReadTimestamp.isNull() && newOldestTimestamp > oplogReadTimestamp) { // Oplog visibility is updated asynchronously from replication updating the commit point. // When force is not set, lag the `oldest_timestamp` to the possibly stale oplog read // timestamp value. This guarantees an oplog reader's `read_timestamp` can always // be serviced. When force is set, we respect the caller's request and do not lag the // oldest timestamp. - oldestTimestamp = oplogReadTimestamp; + newOldestTimestamp = oplogReadTimestamp; } + _setOldestTimestamp(newOldestTimestamp, false); +} + +void WiredTigerKVEngine::setOldestTimestamp(Timestamp newOldestTimestamp) { + _setOldestTimestamp(newOldestTimestamp, true); +} + +void WiredTigerKVEngine::_setOldestTimestamp(Timestamp newOldestTimestamp, bool force) { char oldestTSConfigString["force=true,oldest_timestamp=,commit_timestamp="_sd.size() + (2 * 8 * 2) /* 2 timestamps of 16 hexadecimal digits each */ + 1 /* trailing null */]; @@ -1233,13 +1252,13 @@ void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool for size = std::snprintf(oldestTSConfigString, sizeof(oldestTSConfigString), "force=true,oldest_timestamp=%llx,commit_timestamp=%llx", - oldestTimestamp.asULL(), - oldestTimestamp.asULL()); + newOldestTimestamp.asULL(), + newOldestTimestamp.asULL()); } else { size = std::snprintf(oldestTSConfigString, sizeof(oldestTSConfigString), "oldest_timestamp=%llx", - oldestTimestamp.asULL()); + newOldestTimestamp.asULL()); } if (size < 0) { int e = errno; @@ -1248,14 +1267,51 @@ void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool for } invariant(static_cast(size) < sizeof(oldestTSConfigString)); invariantWTOK(_conn->set_timestamp(_conn, oldestTSConfigString)); + { + stdx::lock_guard lock(_oldestTimestampMutex); + // set_timestamp above ignores backwards in time without force. + if (force) { + _oldestTimestamp = newOldestTimestamp; + } else if (_oldestTimestamp < newOldestTimestamp) { + _oldestTimestamp = newOldestTimestamp; + } + } if (force) { - LOG(2) << "oldest_timestamp and commit_timestamp force set to " << oldestTimestamp; + LOG(2) << "oldest_timestamp and commit_timestamp force set to " << newOldestTimestamp; } else { - LOG(2) << "oldest_timestamp set to " << oldestTimestamp; + LOG(2) << "oldest_timestamp set to " << newOldestTimestamp; } } +Timestamp WiredTigerKVEngine::_calculateHistoryLagFromStableTimestamp(Timestamp stableTimestamp) { + + // The oldest_timestamp should lag behind the stable_timestamp by + // 'targetSnapshotHistoryWindowInSeconds' seconds. + + if (stableTimestamp.getSecs() < + static_cast(snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load())) { + // The history window is larger than the timestamp history thus far. We must wait for + // the history to reach the window size before moving oldest_timestamp forward. + return Timestamp(); + } + + Timestamp calculatedOldestTimestamp( + stableTimestamp.getSecs() - + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(), + stableTimestamp.getInc()); + { + stdx::lock_guard lock(_oldestTimestampMutex); + if (calculatedOldestTimestamp <= _oldestTimestamp) { + // The stable_timestamp is not far enough ahead of the oldest_timestamp for the + // oldest_timestamp to be moved forward: the window is still too small. + return Timestamp(); + } + } + + return calculatedOldestTimestamp; +} + void WiredTigerKVEngine::setInitialDataTimestamp(Timestamp initialDataTimestamp) { if (_checkpointThread) { _checkpointThread->setInitialDataTimestamp(initialDataTimestamp); @@ -1380,4 +1436,24 @@ void WiredTigerKVEngine::replicationBatchIsComplete() const { _oplogManager->triggerJournalFlush(); } +bool WiredTigerKVEngine::isCacheUnderPressure(OperationContext* opCtx) const { + WiredTigerSession* session = WiredTigerRecoveryUnit::get(opCtx)->getSessionNoTxn(); + invariant(session); + + int64_t score = uassertStatusOK(WiredTigerUtil::getStatisticsValueAs( + session->getSession(), "statistics:", "", WT_STAT_CONN_CACHE_LOOKASIDE_SCORE)); + + return (score >= snapshotWindowParams.cachePressureThreshold.load()); +} + +Timestamp WiredTigerKVEngine::getStableTimestamp() const { + stdx::lock_guard lock(_stableTimestampMutex); + return _stableTimestamp; +} + +Timestamp WiredTigerKVEngine::getOldestTimestamp() const { + stdx::lock_guard lock(_oldestTimestampMutex); + return _oldestTimestamp; +} + } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 98806dd4222..6d62e8663b7 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -180,11 +180,9 @@ public: virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override; - /** - * This method will force the oldest timestamp to the input value. Callers must be serialized - * along with `setStableTimestamp` - */ - void setOldestTimestamp(Timestamp oldestTimestamp); + virtual void setOldestTimestampFromStable() override; + + virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override; virtual bool supportsRecoverToStableTimestamp() const override; @@ -254,7 +252,7 @@ public: /** * Sets the implementation for `initRsOplogBackgroundThread` (allowing tests to skip the - * background job, for example). Intended to be called from a MONGO_INITIALIZER and therefroe in + * background job, for example). Intended to be called from a MONGO_INITIALIZER and therefore in * a single threaded context. */ static void setInitRsOplogBackgroundThreadCallback(stdx::function cb); @@ -269,6 +267,15 @@ public: static void appendGlobalStats(BSONObjBuilder& b); + bool isCacheUnderPressure(OperationContext* opCtx) const override; + + /** + * These are timestamp access functions for serverStatus to be able to report the actual + * snapshot window size. + */ + Timestamp getStableTimestamp() const; + Timestamp getOldestTimestamp() const; + private: class WiredTigerJournalFlusher; class WiredTigerCheckpointThread; @@ -280,7 +287,22 @@ private: std::string _uri(StringData ident) const; - void _setOldestTimestamp(Timestamp oldestTimestamp, bool force = false); + /** + * Uses the 'stableTimestamp', the 'targetSnapshotHistoryWindowInSeconds' setting and the + * current _oldestTimestamp to calculate what the new oldest_timestamp should be, in order to + * maintain a window of available snapshots on the storage engine from oldest to stable + * timestamp. + * + * If the returned Timestamp isNull(), oldest_timestamp should not be moved forward. + */ + Timestamp _calculateHistoryLagFromStableTimestamp(Timestamp stableTimestamp); + + /** + * Sets the oldest timestamp for which the storage engine must maintain snapshot history + * through. If force is true, oldest will be set to the given input value, unmodified, even if + * it is backwards in time from the last oldest timestamp (accomodating initial sync). + */ + void _setOldestTimestamp(Timestamp newOldestTimestamp, bool force); WT_CONNECTION* _conn; WT_EVENT_HANDLER _eventHandler; @@ -319,5 +341,13 @@ private: std::unique_ptr _backupSession; Timestamp _recoveryTimestamp; WiredTigerFileVersion _fileVersion; + + // Ensures accesses to _oldestTimestamp and _stableTimestamp, respectively, are multi-core safe. + mutable stdx::mutex _oldestTimestampMutex; + mutable stdx::mutex _stableTimestampMutex; + + // Tracks the stable and oldest timestamps we've set on the storage engine. + Timestamp _oldestTimestamp; + Timestamp _stableTimestamp; }; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp index dce16581223..75dc1b4d029 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp @@ -105,6 +105,10 @@ public: return std::move(ret); } + WiredTigerKVEngine* getEngine() { + return &_engine; + } + private: unittest::TempDir _dbpath; ClockSourceMock _cs; @@ -142,14 +146,14 @@ public: } void setUp() override { - harnessHelper = newRecoveryUnitHarnessHelper(); + harnessHelper = std::make_unique(); clientAndCtx1 = makeClientAndOpCtx(harnessHelper.get(), "writer"); clientAndCtx2 = makeClientAndOpCtx(harnessHelper.get(), "reader"); ru1 = checked_cast(clientAndCtx1.second->recoveryUnit()); ru2 = checked_cast(clientAndCtx2.second->recoveryUnit()); } - std::unique_ptr harnessHelper; + std::unique_ptr harnessHelper; ClientAndCtx clientAndCtx1, clientAndCtx2; WiredTigerRecoveryUnit *ru1, *ru2; @@ -163,6 +167,50 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, SetReadSource) { ASSERT_EQ(RecoveryUnit::ReadSource::kProvided, ru1->getTimestampReadSource()); ASSERT_EQ(Timestamp(1, 1), ru1->getPointInTimeReadTimestamp()); } + +TEST_F(WiredTigerRecoveryUnitTestFixture, CreateAndCheckForCachePressure) { + int time = 1; + + // Reconfigure the size of the cache to be very small so that building cache pressure is fast. + WiredTigerKVEngine* engine = harnessHelper->getEngine(); + std::string cacheSizeReconfig = "cache_size=1MB"; + ASSERT_EQ(engine->reconfigure(cacheSizeReconfig.c_str()), 0); + + OperationContext* opCtx = clientAndCtx1.second.get(); + std::unique_ptr rs(harnessHelper->createRecordStore(opCtx, "a.b")); + + // Insert one document so that we can then update it in a loop to create cache pressure. + // Note: inserts will not create cache pressure. + WriteUnitOfWork wu(opCtx); + ASSERT_OK(ru1->setTimestamp(Timestamp(time++))); + std::string str = str::stream() << "foobarbaz"; + StatusWith ress = + rs->insertRecord(opCtx, str.c_str(), str.size() + 1, Timestamp(), false); + ASSERT_OK(ress.getStatus()); + auto recordId = ress.getValue(); + wu.commit(); + + for (int j = 0; j < 1000; ++j) { + // Once we hit the cache pressure threshold, i.e. have successfully created cache pressure + // that is detectable, we are done. + if (engine->isCacheUnderPressure(opCtx)) { + invariant(j != 0); + break; + } + + try { + WriteUnitOfWork wuow(opCtx); + ASSERT_OK(ru1->setTimestamp(Timestamp(time++))); + std::string s = str::stream() + << "abcbcdcdedefefgfghghihijijkjklklmlmnmnomopopqpqrqrsrststutuv" << j; + ASSERT_OK(rs->updateRecord(opCtx, recordId, s.c_str(), s.size() + 1, false, nullptr)); + wuow.commit(); + } catch (const DBException& ex) { + invariant(ex.toStatus().code() == ErrorCodes::WriteConflict); + } + } +} + TEST_F(WiredTigerRecoveryUnitTestFixture, LocalReadOnADocumentBeingPreparedTriggersPrepareConflict) { // Prepare but don't commit a transaction diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp index 3a58b4f91d9..4cd9af2244f 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp @@ -42,7 +42,6 @@ #include "mongo/db/storage/wiredtiger/wiredtiger_session_cache.h" #include "mongo/db/storage/wiredtiger/wiredtiger_util.h" #include "mongo/util/assert_util.h" -#include "mongo/util/scopeguard.h" namespace mongo { @@ -79,6 +78,8 @@ BSONObj WiredTigerServerStatusSection::generateSection(OperationContext* opCtx, WiredTigerKVEngine::appendGlobalStats(bob); + WiredTigerUtil::appendSnapshotWindowSettings(_engine, session, &bob); + return bob.obj(); } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp index bfa26ed8772..7234190b222 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp @@ -39,6 +39,8 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/server_parameters.h" +#include "mongo/db/snapshot_window_options.h" +#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h" #include "mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h" #include "mongo/db/storage/wiredtiger/wiredtiger_session_cache.h" #include "mongo/util/assert_util.h" @@ -591,4 +593,36 @@ Status WiredTigerUtil::exportTableToBSON(WT_SESSION* session, return Status::OK(); } +void WiredTigerUtil::appendSnapshotWindowSettings(WiredTigerKVEngine* engine, + WiredTigerSession* session, + BSONObjBuilder* bob) { + invariant(engine); + invariant(session); + invariant(bob); + + const Timestamp& stableTimestamp = engine->getStableTimestamp(); + const Timestamp& oldestTimestamp = engine->getOldestTimestamp(); + + const unsigned currentAvailableSnapshotWindow = + stableTimestamp.getSecs() - oldestTimestamp.getSecs(); + + int64_t score = uassertStatusOK(WiredTigerUtil::getStatisticsValueAs( + session->getSession(), "statistics:", "", WT_STAT_CONN_CACHE_LOOKASIDE_SCORE)); + + BSONObjBuilder settings(bob->subobjStart("snapshot-window-settings")); + settings.append("cache pressure percentage threshold", + snapshotWindowParams.cachePressureThreshold.load()); + settings.append("current cache pressure percentage", score); + settings.append("max target available snapshots window size in seconds", + snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load()); + settings.append("target available snapshots window size in seconds", + snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load()); + settings.append("current available snapshots window size in seconds", + currentAvailableSnapshotWindow); + settings.append("latest majority snapshot timestamp available", + stableTimestamp.toStringPretty()); + settings.append("oldest majority snapshot timestamp available", + oldestTimestamp.toStringPretty()); +} + } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h index 5817a5855f6..4b09387a157 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h @@ -45,6 +45,8 @@ namespace mongo { class BSONObjBuilder; class OperationContext; class WiredTigerConfigParser; +class WiredTigerKVEngine; +class WiredTigerSession; inline bool wt_keeptxnopen() { return false; @@ -115,6 +117,24 @@ public: const std::string& config, BSONObjBuilder* bob); + /** + * Appends information about the storage engine's currently available snapshots and the settings + * that affect that window of maintained history. + * + * "snapshot-window-settings" : { + * "cache pressure percentage threshold" : , + * "current cache pressure percentage" : , + * "max target available snapshots window size in seconds" : , + * "target available snapshots window size in seconds" : , + * "current available snapshots window size in seconds" : , + * "latest majority snapshot timestamp available" : , + * "oldest majority snapshot timestamp available" : + * } + */ + static void appendSnapshotWindowSettings(WiredTigerKVEngine* engine, + WiredTigerSession* session, + BSONObjBuilder* bob); + /** * Gets entire metadata string for collection/index at URI. */ -- cgit v1.2.1