summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@10gen.com>2018-04-12 16:44:36 -0400
committerDianna Hohensee <dianna.hohensee@10gen.com>2018-05-25 09:51:34 -0400
commit136178a12e6b4fe41b6be047ee04db60a69af33e (patch)
tree70164899452b169e2cd77cb3356bdb3a40213ab0 /src/mongo/db
parente573d7f2f908f3fbe96716851cd1b1e3d65fe7c9 (diff)
downloadmongo-136178a12e6b4fe41b6be047ee04db60a69af33e.tar.gz
SERVER-31767 Provide a window of snapshot history that is accessible for PIT reads
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/SConscript69
-rw-r--r--src/mongo/db/db.cpp7
-rw-r--r--src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp2
-rw-r--r--src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp92
-rw-r--r--src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h45
-rw-r--r--src/mongo/db/repl/storage_interface.h2
-rw-r--r--src/mongo/db/service_context_d_test_fixture.h2
-rw-r--r--src/mongo/db/service_context_devnull_test_fixture.cpp70
-rw-r--r--src/mongo/db/service_context_devnull_test_fixture.h57
-rw-r--r--src/mongo/db/service_entry_point_common.cpp10
-rw-r--r--src/mongo/db/snapshot_window_options.cpp167
-rw-r--r--src/mongo/db/snapshot_window_options.h108
-rw-r--r--src/mongo/db/snapshot_window_util.cpp147
-rw-r--r--src/mongo/db/snapshot_window_util.h77
-rw-r--r--src/mongo/db/snapshot_window_util_test.cpp156
-rw-r--r--src/mongo/db/storage/devnull/SConscript3
-rw-r--r--src/mongo/db/storage/devnull/devnull_kv_engine.cpp11
-rw-r--r--src/mongo/db/storage/devnull/devnull_kv_engine.h9
-rw-r--r--src/mongo/db/storage/kv/kv_engine.h19
-rw-r--r--src/mongo/db/storage/kv/kv_engine_test_harness.h8
-rw-r--r--src/mongo/db/storage/kv/kv_storage_engine.cpp16
-rw-r--r--src/mongo/db/storage/kv/kv_storage_engine.h8
-rw-r--r--src/mongo/db/storage/storage_engine.h31
-rw-r--r--src/mongo/db/storage/test_harness_helper.h11
-rw-r--r--src/mongo/db/storage/wiredtiger/SConscript1
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp114
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h44
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp52
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp3
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp34
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_util.h20
31 files changed, 1354 insertions, 41 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 59d12eb3f23..dbf3eb40306 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -763,6 +763,7 @@ env.Library(
'$BUILD_DIR/mongo/base',
],
LIBDEPS_PRIVATE=[
+ 'snapshot_window_util',
'$BUILD_DIR/mongo/db/auth/auth',
'$BUILD_DIR/mongo/db/auth/authprivilege',
'$BUILD_DIR/mongo/db/command_can_run_here',
@@ -1464,9 +1465,56 @@ env.Library(
source=[
'periodic_runner_job_abort_expired_transactions.cpp',
],
- LIBDEPS=[
- '$BUILD_DIR/mongo/util/periodic_runner',
+ LIBDEPS_PRIVATE=[
'kill_sessions_local',
+ '$BUILD_DIR/mongo/util/periodic_runner',
+ ],
+)
+
+env.Library(
+ target='periodic_runner_job_decrease_snapshot_cache_pressure',
+ source=[
+ 'periodic_runner_job_decrease_snapshot_cache_pressure.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ 'snapshot_window_options',
+ 'snapshot_window_util',
+ '$BUILD_DIR/mongo/db/service_context',
+ '$BUILD_DIR/mongo/util/periodic_runner',
+ ],
+)
+
+env.Library(
+ target='snapshot_window_options',
+ source=[
+ 'snapshot_window_options.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/server_parameters',
+ ],
+)
+
+env.Library(
+ target='snapshot_window_util',
+ source=[
+ 'snapshot_window_util.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ 'snapshot_window_options',
+ '$BUILD_DIR/mongo/db/service_context',
+ ],
+)
+
+env.CppUnitTest(
+ target='snapshot_window_util_test',
+ source=[
+ 'snapshot_window_util_test.cpp',
+ ],
+ LIBDEPS_PRIVATE=[
+ 'service_context_d',
+ 'service_context_devnull_test_fixture',
+ 'snapshot_window_options',
+ 'snapshot_window_util',
],
)
@@ -1695,6 +1743,23 @@ env.Library(
)
env.Library(
+ target= 'service_context_devnull_test_fixture',
+ source= [
+ 'service_context_devnull_test_fixture.cpp',
+ ],
+ LIBDEPS=[
+ # this library is required only because of SERVER-29908
+ '$BUILD_DIR/mongo/db/s/sharding_runtime_d',
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/auth/authmocks',
+ '$BUILD_DIR/mongo/db/storage/devnull/storage_devnull',
+ '$BUILD_DIR/mongo/unittest/unittest',
+ 'service_context_d',
+ ],
+)
+
+env.Library(
target='log_process_details',
source=[
'log_process_details.cpp',
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index 5ddf80abdd9..6cb7c322667 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -95,6 +95,7 @@
#include "mongo/db/op_observer_registry.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/periodic_runner_job_abort_expired_transactions.h"
+#include "mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h"
#include "mongo/db/query/internal_plans.h"
#include "mongo/db/repair_database_and_check_version.h"
#include "mongo/db/repl/drop_pending_collection_reaper.h"
@@ -601,13 +602,17 @@ ExitCode _initAndListen(int listenPort) {
SessionKiller::set(serviceContext,
std::make_shared<SessionKiller>(serviceContext, killSessionsLocal));
- // Start up a background task to periodically check for and kill expired transactions.
+ // Start up a background task to periodically check for and kill expired transactions; and a
+ // background task to periodically check for and decrease cache pressure by decreasing the
+ // target size setting for the storage engine's window of available snapshots.
+ //
// Only do this on storage engines supporting snapshot reads, which hold resources we wish to
// release periodically in order to avoid storage cache pressure build up.
auto storageEngine = serviceContext->getStorageEngine();
invariant(storageEngine);
if (storageEngine->supportsReadConcernSnapshot()) {
startPeriodicThreadToAbortExpiredTransactions(serviceContext);
+ startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(serviceContext);
}
// Set up the logical session cache
diff --git a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp
index 84959e6fd3c..196a0f1dca9 100644
--- a/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp
+++ b/src/mongo/db/periodic_runner_job_abort_expired_transactions.cpp
@@ -70,7 +70,7 @@ void startPeriodicThreadToAbortExpiredTransactions(ServiceContext* serviceContex
period = (period < 1) ? 1 : period;
period = (period > 60) ? 60 : period;
- if (++seconds < period) {
+ if (++seconds <= period) {
return;
}
diff --git a/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp
new file mode 100644
index 00000000000..351302a3d4c
--- /dev/null
+++ b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.cpp
@@ -0,0 +1,92 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h"
+
+#include "mongo/db/client.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/snapshot_window_options.h"
+#include "mongo/db/snapshot_window_util.h"
+#include "mongo/util/log.h"
+#include "mongo/util/periodic_runner.h"
+
+namespace mongo {
+
+void startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(ServiceContext* serviceContext) {
+ // Enforce calling this function once, and only once.
+ static bool firstCall = true;
+ invariant(firstCall);
+ firstCall = false;
+
+ auto periodicRunner = serviceContext->getPeriodicRunner();
+ invariant(periodicRunner);
+
+ // PeriodicRunner does not currently support altering the period of a job. So we are giving this
+ // job a 1 second period on PeriodicRunner and incrementing a static variable 'seconds' on each
+ // run until we reach checkCachePressurePeriodSeconds, at which point we run the code and reset
+ // 'seconds'. Etc.
+ PeriodicRunner::PeriodicJob job(
+ "startPeriodicThreadToDecreaseSnapshotHistoryCachePressure",
+ [](Client* client) {
+ try {
+ static int seconds = 0;
+ int checkPressurePeriod =
+ snapshotWindowParams.checkCachePressurePeriodSeconds.load();
+
+ invariant(checkPressurePeriod >= 1);
+
+ if (++seconds <= checkPressurePeriod) {
+ return;
+ }
+
+ seconds = 0;
+
+ // The opCtx destructor handles unsetting itself from the Client.
+ // (The PeriodicRunnerASIO's Client must be reset before returning.)
+ auto opCtx = client->makeOperationContext();
+
+ SnapshotWindowUtil::decreaseTargetSnapshotWindowSize(opCtx.get());
+ } catch (const DBException& ex) {
+ if (!ErrorCodes::isShutdownError(ex.toStatus().code())) {
+ warning() << "Periodic task to check for and decrease cache pressure caused by "
+ "maintaining too much snapshot history failed! Caused by: "
+ << ex.toStatus();
+ }
+ }
+ },
+ Seconds(1));
+
+ periodicRunner->scheduleJob(std::move(job));
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h
new file mode 100644
index 00000000000..3ff0be0dae9
--- /dev/null
+++ b/src/mongo/db/periodic_runner_job_decrease_snapshot_cache_pressure.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#pragma once
+
+namespace mongo {
+
+class ServiceContext;
+
+/**
+ * Periodically checks for storage engine cache pressure to determine whether the maintained
+ * snapshot history window target setting should be decreased. Maintaining too much snapshot and
+ * write history can slow down the system. Runs once every checkCachePressurePeriodSeconds.
+ *
+ * This function should only ever be called once, during mongod server startup (db.cpp).
+ * The PeriodicRunner will handle shutting down the job on shutdown, no extra handling necessary.
+ */
+void startPeriodicThreadToDecreaseSnapshotHistoryCachePressure(ServiceContext* serviceContext);
+
+} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h
index 5b526456752..ca3ce588fa7 100644
--- a/src/mongo/db/repl/storage_interface.h
+++ b/src/mongo/db/repl/storage_interface.h
@@ -59,7 +59,7 @@ struct TimestampedBSONObj {
/**
* Storage interface used by the replication system to interact with storage.
- * This interface provides seperation of concerns and a place for mocking out test
+ * This interface provides separation of concerns and a place for mocking out test
* interactions.
*
* The grouping of functionality includes general collection helpers, and more specific replication
diff --git a/src/mongo/db/service_context_d_test_fixture.h b/src/mongo/db/service_context_d_test_fixture.h
index 35932b64814..bd56f4f2ae6 100644
--- a/src/mongo/db/service_context_d_test_fixture.h
+++ b/src/mongo/db/service_context_d_test_fixture.h
@@ -35,7 +35,7 @@
namespace mongo {
/**
- * Test fixture class for tests that use either the "ephemeralForTest" or "devnull" storage engines.
+ * Test fixture class for tests that use the "ephemeralForTest" storage engine.
*/
class ServiceContextMongoDTest : public unittest::Test {
public:
diff --git a/src/mongo/db/service_context_devnull_test_fixture.cpp b/src/mongo/db/service_context_devnull_test_fixture.cpp
new file mode 100644
index 00000000000..cefea8a0053
--- /dev/null
+++ b/src/mongo/db/service_context_devnull_test_fixture.cpp
@@ -0,0 +1,70 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/service_context_devnull_test_fixture.h"
+
+#include "mongo/db/client.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/service_context_d.h"
+#include "mongo/db/storage/storage_engine_init.h"
+#include "mongo/stdx/memory.h"
+#include "mongo/unittest/temp_dir.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+
+void ServiceContextDevnullTestFixture::setUp() {
+ Test::setUp();
+ Client::initThread(getThreadName());
+
+ auto const serviceContext = getServiceContext();
+ if (!serviceContext->getStorageEngine()) {
+ // When using the 'devnull' storage engine, it is fine for the temporary directory to
+ // go away after the global storage engine is initialized.
+ unittest::TempDir tempDir("service_context_devnull_test");
+ mongo::storageGlobalParams.dbpath = tempDir.path();
+ mongo::storageGlobalParams.engine = "devnull";
+ mongo::storageGlobalParams.engineSetByUser = true;
+ createLockFile(serviceContext);
+ initializeStorageEngine(serviceContext);
+ }
+}
+
+void ServiceContextDevnullTestFixture::tearDown() {
+ Client::destroy();
+ Test::tearDown();
+}
+
+ServiceContext* ServiceContextDevnullTestFixture::getServiceContext() {
+ return getGlobalServiceContext();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/service_context_devnull_test_fixture.h b/src/mongo/db/service_context_devnull_test_fixture.h
new file mode 100644
index 00000000000..77a0d551965
--- /dev/null
+++ b/src/mongo/db/service_context_devnull_test_fixture.h
@@ -0,0 +1,57 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+class ServiceContext;
+
+/**
+ * Test fixture class for mongod tests that use the "devnull" storage engine.
+ */
+class ServiceContextDevnullTestFixture : public unittest::Test {
+public:
+ /**
+ * Initializes the devnull engine as the global storage engine.
+ * Also sets up a Client on the thread, which can be accessed via 'cc()'.
+ */
+ void setUp() override;
+
+ void tearDown() override;
+
+ /**
+ * Returns a service context, which is only valid for this instance of the test.
+ * Must not be called before setUp() or after tearDown().
+ */
+ ServiceContext* getServiceContext();
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index f6cabff5bb1..116a5098a7d 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -70,6 +70,7 @@
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/service_entry_point_common.h"
#include "mongo/db/session_catalog.h"
+#include "mongo/db/snapshot_window_util.h"
#include "mongo/db/stats/counters.h"
#include "mongo/db/stats/top.h"
#include "mongo/rpc/factory.h"
@@ -880,6 +881,15 @@ void execCommandDatabase(OperationContext* opCtx,
onCannotImplicitlyCreateCollection(opCtx, cannotImplicitCreateCollInfo->getNss())
.ignore();
}
+ } else if (e.code() == ErrorCodes::SnapshotTooOld) {
+ // SnapshotTooOld errors indicate that PIT ops are failing to find an available snapshot
+ // at their specified atClusterTime. Therefore, we'll try to increase the snapshot
+ // history window that the storage engine maintains in order to increase the likelihood
+ // of successful future PIT atClusterTime requests.
+ auto engine = opCtx->getServiceContext()->getStorageEngine();
+ if (engine && engine->supportsReadConcernSnapshot()) {
+ SnapshotWindowUtil::increaseTargetSnapshotWindowSize(opCtx);
+ }
}
// Append the error labels for transient transaction errors.
diff --git a/src/mongo/db/snapshot_window_options.cpp b/src/mongo/db/snapshot_window_options.cpp
new file mode 100644
index 00000000000..3b19de96fb5
--- /dev/null
+++ b/src/mongo/db/snapshot_window_options.cpp
@@ -0,0 +1,167 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/snapshot_window_options.h"
+
+#include "mongo/db/server_parameters.h"
+#include "mongo/platform/compiler.h"
+
+namespace mongo {
+
+SnapshotWindowParams snapshotWindowParams;
+
+/**
+ * Provides validation for snapshot window server parameter settings.
+ */
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedMaxTargetSnapshotHistoryWindowInSeconds =
+ (new ExportedServerParameter<int32_t, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "maxTargetSnapshotHistoryWindowInSeconds",
+ &snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds))
+ -> withValidator([](const int32_t& potentialNewValue) {
+ if (potentialNewValue < 0) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "maxTargetSnapshotHistoryWindowInSeconds must be "
+ "greater than or equal to 0. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+ return Status::OK();
+ });
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedCachePressureThreshold =
+ (new ExportedServerParameter<int32_t, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "cachePressureThreshold",
+ &snapshotWindowParams.cachePressureThreshold))
+ -> withValidator([](const int32_t& potentialNewValue) {
+ if (potentialNewValue < 0 || potentialNewValue > 100) {
+ return Status(ErrorCodes::BadValue,
+ str::stream() << "cachePressureThreshold must be greater than or "
+ "equal to 0 and less than or equal to 100. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+ return Status::OK();
+ });
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedSnapshotWindowMultiplicativeDecrease =
+ (new ExportedServerParameter<double, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "snapshotWindowMultiplicativeDecrease",
+ &snapshotWindowParams.snapshotWindowMultiplicativeDecrease))
+ -> withValidator([](const double& potentialNewValue) {
+ if (potentialNewValue <= 0 || potentialNewValue >= 1) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "snapshotWindowMultiplicativeDecrease must be greater "
+ "than 0 and less than 1. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+
+ return Status::OK();
+ });
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedSnapshotWindowAdditiveIncreaseSeconds =
+ (new ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "snapshotWindowAdditiveIncreaseSeconds",
+ &snapshotWindowParams
+ .snapshotWindowAdditiveIncreaseSeconds)) -> withValidator([](const int32_t&
+ potentialNewValue) {
+ if (potentialNewValue < 1) {
+ return Status(
+ ErrorCodes::BadValue,
+ str::stream()
+ << "snapshotWindowAdditiveIncreaseSeconds must be greater than or equal to 1. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+
+ return Status::OK();
+ });
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedMinMillisBetweenSnapshotWindowInc =
+ (new ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "minMillisBetweenSnapshotWindowInc",
+ &snapshotWindowParams.minMillisBetweenSnapshotWindowInc))
+ -> withValidator([](const int32_t& potentialNewValue) {
+ if (potentialNewValue < 1) {
+ return Status(
+ ErrorCodes::BadValue,
+ str::stream()
+ << "minMillisBetweenSnapshotWindowInc must be greater than or equal to 1. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+
+ return Status::OK();
+ });
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedMinMillisBetweenSnapshotWindowDec =
+ (new ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "minMillisBetweenSnapshotWindowDec",
+ &snapshotWindowParams.minMillisBetweenSnapshotWindowDec))
+ -> withValidator([](const int32_t& potentialNewValue) {
+ if (potentialNewValue < 1) {
+ return Status(
+ ErrorCodes::BadValue,
+ str::stream()
+ << "minMillisBetweenSnapshotWindowDec must be greater than or equal to 1. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+
+ return Status::OK();
+ });
+
+MONGO_COMPILER_VARIABLE_UNUSED auto _exportedCheckCachePressurePeriodSeconds =
+ (new ExportedServerParameter<std::int32_t, ServerParameterType::kStartupAndRuntime>(
+ ServerParameterSet::getGlobal(),
+ "checkCachePressurePeriodSeconds",
+ &snapshotWindowParams.checkCachePressurePeriodSeconds))
+ -> withValidator([](const int32_t& potentialNewValue) {
+ if (potentialNewValue < 1) {
+ return Status(
+ ErrorCodes::BadValue,
+ str::stream()
+ << "checkCachePressurePeriodSeconds must be greater than or equal to 1. '"
+ << potentialNewValue
+ << "' is an invalid setting.");
+ }
+
+ return Status::OK();
+ });
+
+} // namespace mongo
diff --git a/src/mongo/db/snapshot_window_options.h b/src/mongo/db/snapshot_window_options.h
new file mode 100644
index 00000000000..992ff718212
--- /dev/null
+++ b/src/mongo/db/snapshot_window_options.h
@@ -0,0 +1,108 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/platform/atomic_proxy.h"
+#include "mongo/platform/atomic_word.h"
+
+namespace mongo {
+
+/**
+ * This is a collection of parameters that affect how much snapshot history the storage engine will
+ * maintain to support point-in-time transactions (read or write). This is referred to as the
+ * snapshot window.
+ */
+struct SnapshotWindowParams {
+
+ // maxTargetSnapshotHistoryWindowInSeconds (startup & runtime server paramter, range 0+).
+ //
+ // Dictates the maximum lag in seconds oldest_timestamp should be behind stable_timestamp.
+ // targetSnapshotHistoryWindowInSeconds below is the actual active lag setting target.
+ //
+ // Note that the window size can become greater than this if an ongoing operation is holding an
+ // older snapshot open.
+ AtomicInt32 maxTargetSnapshotHistoryWindowInSeconds{100};
+
+ // targetSnapshotHistoryWindowInSeconds (not a server parameter, range 0+).
+ //
+ // Dictates the target lag in seconds oldest_timestamp should be set behind stable_timestamp.
+ // Should only be set in the range [0, maxTargetSnapshotHistoryWindowInSeconds].
+ //
+ // Note that this is the history window we attempt to maintain, but our current system state may
+ // not always reflect it: the window can only change as more writes come in, so it can take time
+ // for the actual window size to catch up with a change. This value guides actions whenever the
+ // system goes to update the oldest_timestamp value.
+ AtomicInt32 targetSnapshotHistoryWindowInSeconds{
+ maxTargetSnapshotHistoryWindowInSeconds.load()};
+
+ // cachePressureThreshold (startup & runtime server paramter, range [0, 100]).
+ //
+ // Dictates what percentage of cache in use is considered too high. This setting helps preempt
+ // storage cache pressure immobilizing the system. Attempts to increase
+ // targetSnapshotHistoryWindowInSeconds will be ignored when the cache pressure reaches this
+ // threshold. Additionally, a periodic task will decrease targetSnapshotHistoryWindowInSeconds
+ // when cache pressure exceeds the threshold.
+ AtomicInt32 cachePressureThreshold{50};
+
+ // snapshotWindowMultiplicativeDecrease (startup & runtime server paramter, range (0,1)).
+ //
+ // Controls by what multiplier the target snapshot history window setting is decreased when
+ // cache pressure becomes too high, per the cachePressureThreshold setting.
+ AtomicDouble snapshotWindowMultiplicativeDecrease{0.75};
+
+ // snapshotWindowAdditiveIncreaseSeconds (startup & runtime server paramter, range 1+).
+ //
+ // Controls by how much the target snapshot history window setting is increased when cache
+ // pressure is OK, per cachePressureThreshold, and we need to service older snapshots for global
+ // point-in-time reads.
+ AtomicInt32 snapshotWindowAdditiveIncreaseSeconds{2};
+
+ // minMillisBetweenSnapshotWindowInc (startup & runtime server paramter, range 0+).
+ // minMillisBetweenSnapshotWindowDec (startup & runtime server paramter, range 0+).
+ //
+ // Controls how often attempting to increase/decrease the target snapshot window will have an
+ // effect. Multiple callers within minMillisBetweenSnapshotWindowInc will have the same effect
+ // as one. This protects the system because it takes time for the target snapshot window to
+ // affect the actual storage engine snapshot window. The stable timestamp must move forward for
+ // the window between it and oldest timestamp to grow or shrink.
+ AtomicInt32 minMillisBetweenSnapshotWindowInc{500};
+ AtomicInt32 minMillisBetweenSnapshotWindowDec{500};
+
+ // checkCachePressurePeriodSeconds (startup & runtime server paramter, range 1+)
+ //
+ // Controls the period of the task that checks for cache pressure and decreases
+ // targetSnapshotHistoryWindowInSeconds if the pressure is above cachePressureThreshold. The
+ // target window size setting must not be decreased too fast because time must be allowed for
+ // the storage engine to attempt to act on the new setting.
+ AtomicInt32 checkCachePressurePeriodSeconds{5};
+};
+
+extern SnapshotWindowParams snapshotWindowParams;
+
+} // namespace mongo
diff --git a/src/mongo/db/snapshot_window_util.cpp b/src/mongo/db/snapshot_window_util.cpp
new file mode 100644
index 00000000000..d364187118a
--- /dev/null
+++ b/src/mongo/db/snapshot_window_util.cpp
@@ -0,0 +1,147 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/snapshot_window_util.h"
+
+#include "mongo/db/operation_context.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/snapshot_window_options.h"
+#include "mongo/db/storage/storage_engine.h"
+#include "mongo/stdx/mutex.h"
+#include "mongo/util/concurrency/with_lock.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+
+namespace SnapshotWindowUtil {
+
+// Adds concurrency control to increaseTargetSnapshotWindowSize() and
+// decreaseTargetSnapshotWindowSize(). They should not run concurrently with themselves or one
+// another, since they act on and modify the same storage parameters. Further guards the static
+// variables "_snapshotWindowLastDecreasedAt" and "_snapshotWindowLastIncreasedAt" used in
+// increaseTargetSnapshotWindowSize() and decreaseSnapshowWindow().
+stdx::mutex snapshotWindowMutex;
+
+namespace {
+
+void _decreaseTargetSnapshotWindowSize(WithLock lock, OperationContext* opCtx) {
+ // Tracks the last time that the snapshot window was decreased so that it does not go down so
+ // fast that the system does not have time to react and reduce snapshot availability.
+ static Date_t _snapshotWindowLastDecreasedAt{Date_t::min()};
+
+ if (_snapshotWindowLastDecreasedAt >
+ (Date_t::now() -
+ Milliseconds(snapshotWindowParams.minMillisBetweenSnapshotWindowDec.load()))) {
+ // We have already decreased the window size in the last minMillisBetweenSnapshotWindowDec
+ // milliseconds.
+ return;
+ }
+
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.store(
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load() *
+ snapshotWindowParams.snapshotWindowMultiplicativeDecrease.load());
+
+ // Try to set the oldest_timestamp immediately without waiting for a write to adjust the
+ // window. May or may not work depending on the state of the system.
+ StorageEngine* engine = opCtx->getServiceContext()->getStorageEngine();
+ invariant(engine);
+ engine->setOldestTimestampFromStable();
+
+ _snapshotWindowLastDecreasedAt = Date_t::now();
+}
+
+} // namespace
+
+void increaseTargetSnapshotWindowSize(OperationContext* opCtx) {
+ stdx::unique_lock<stdx::mutex> lock(snapshotWindowMutex);
+
+ // Tracks the last time that the snapshot window was increased so that it does not go up so fast
+ // that the storage engine does not have time to improve snapshot availability.
+ static Date_t _snapshotWindowLastIncreasedAt{Date_t::min()};
+
+ if (_snapshotWindowLastIncreasedAt >
+ (Date_t::now() -
+ Milliseconds(snapshotWindowParams.minMillisBetweenSnapshotWindowInc.load()))) {
+ // We have already increased the window size in the last minMillisBetweenSnapshotWindowInc
+ // milliseconds.
+ return;
+ }
+
+ // If the cache pressure is already too high, we will not put more pressure on it by increasing
+ // the window size.
+ StorageEngine* engine = opCtx->getServiceContext()->getStorageEngine();
+ if (engine && engine->isCacheUnderPressure(opCtx)) {
+ warning() << "Attempted to increase the time window of available snapshots for "
+ "point-in-time operations (readConcern level 'snapshot' or transactions), but "
+ "the storage engine cache pressure, per the cachePressureThreshold setting of "
+ "'"
+ << snapshotWindowParams.cachePressureThreshold.load()
+ << "', is too high to allow it to increase. If this happens frequently, consider "
+ "either increasing the cache pressure threshold or increasing the memory "
+ "available to the storage engine cache, in order to improve the success rate "
+ "or speed of point-in-time requests.";
+ _decreaseTargetSnapshotWindowSize(lock, opCtx);
+ return;
+ }
+
+ if (snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load() ==
+ snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load()) {
+ warning() << "Attempted to increase the time window of available snapshots for "
+ "point-in-time operations (readConcern level 'snapshot' or transactions), but "
+ "maxTargetSnapshotHistoryWindowInSeconds has already been reached. If this "
+ "happens frequently, consider increasing the "
+ "maxTargetSnapshotHistoryWindowInSeconds setting value, which is currently "
+ "set to '"
+ << snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load() << "'.";
+ return;
+ }
+
+ int increasedSnapshotWindow = snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load() +
+ snapshotWindowParams.snapshotWindowAdditiveIncreaseSeconds.load();
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.store(
+ std::min(increasedSnapshotWindow,
+ snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load()));
+
+ _snapshotWindowLastIncreasedAt = Date_t::now();
+}
+
+void decreaseTargetSnapshotWindowSize(OperationContext* opCtx) {
+ stdx::unique_lock<stdx::mutex> lock(snapshotWindowMutex);
+
+ StorageEngine* engine = opCtx->getServiceContext()->getStorageEngine();
+ if (engine && engine->isCacheUnderPressure(opCtx)) {
+ _decreaseTargetSnapshotWindowSize(lock, opCtx);
+ }
+}
+
+} // namespace SnapshotWindowUtil
+} // namespace mongo
diff --git a/src/mongo/db/snapshot_window_util.h b/src/mongo/db/snapshot_window_util.h
new file mode 100644
index 00000000000..f78c5005460
--- /dev/null
+++ b/src/mongo/db/snapshot_window_util.h
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#pragma once
+
+namespace mongo {
+
+class OperationContext;
+
+/**
+ * Function helpers to manipulate parameters affecting the snapshot time window size that the
+ * storage engine maintains of available snapshots for point-in-time reads.
+ */
+namespace SnapshotWindowUtil {
+
+/**
+ * Attempts to increase the setting that controls the window of time between stable_timestamp and
+ * oldest_timestamp, in order to provide a greater range of available snapshots for point-in-time
+ * operations. The window will not be increased, however, if the cache pressure is currently too
+ * high. This function will be called when server requests return SnapshotTooOld (or similar)
+ * errors. Note that this will not immediately affect the oldest_timestamp. Rather, it affects
+ * actions taken next time oldest_timestamp is updated, usually when the stable timestamp is
+ * advanced.
+ *
+ * Implements an additive increase algorithm.
+ *
+ * Calling many times all at once has the same effect as calling once. The last update time is
+ * tracked and attempts to increase the window are limited to once in
+ * minMillisBetweenSnapshotWindowInc. This is to protect against a sudden wave of function calls due
+ * to simultaneous SnapshotTooOld errors. Some time must be allowed for the increased target
+ * snapshot window size to have an effect. The target size can also never exceed
+ * maxTargetSnapshotHistoryWindowInSeconds.
+ */
+void increaseTargetSnapshotWindowSize(OperationContext* opCtx);
+
+/**
+ * Attempts to decrease (if not already zero) the setting that affects the size of the window of
+ * time between stable_timestamp and oldest_timestamp in order to reduce storage engine cache
+ * pressure. The window target will not be decreased, however, if the cache is not currently under
+ * pressure. Pressure can occur when too much history is being maintained for point-in-time
+ * snapshots. Note that this will not necessarily immediately affect the actual window size; rather,
+ * it affects actions taken whenever oldest_timestamp is updated, usually when the stable timestamp
+ * is advanced.
+ *
+ * This will make one attempt to immediately adjust the window size if possible.
+ *
+ * Implements a multiplicative decrease algorithm.
+ */
+void decreaseTargetSnapshotWindowSize(OperationContext* opCtx);
+
+} // namespace SnapshotWindowUtil
+} // namespace mongo
diff --git a/src/mongo/db/snapshot_window_util_test.cpp b/src/mongo/db/snapshot_window_util_test.cpp
new file mode 100644
index 00000000000..d668250f5d9
--- /dev/null
+++ b/src/mongo/db/snapshot_window_util_test.cpp
@@ -0,0 +1,156 @@
+/**
+ * Copyright (C) 2018 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/snapshot_window_util.h"
+
+#include "mongo/db/client.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/service_context_devnull_test_fixture.h"
+#include "mongo/db/snapshot_window_options.h"
+#include "mongo/db/storage/storage_options.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+using namespace SnapshotWindowUtil;
+
+/**
+ * Tests the functions in snapshot_window_util.h using a devnull storage engine.
+ */
+class SnapshotWindowTest : public ServiceContextDevnullTestFixture {
+public:
+ void setUp() override {
+ ServiceContextDevnullTestFixture::setUp();
+ _opCtx = cc().makeOperationContext();
+ }
+
+ void tearDown() override {
+ _opCtx.reset();
+ ServiceContextDevnullTestFixture::tearDown();
+ }
+
+ ServiceContext::UniqueOperationContext _opCtx;
+};
+
+TEST_F(SnapshotWindowTest, DecreaseAndIncreaseSnapshotWindow) {
+ auto engine = getServiceContext()->getStorageEngine();
+ invariant(engine);
+
+ // Lower the time enforced between function calls to speed up testing.
+ // Dec must match Inc b/c increaseTargetWindowSize can call into decreaseTargetWindowSize.
+ snapshotWindowParams.minMillisBetweenSnapshotWindowInc.store(100);
+ snapshotWindowParams.minMillisBetweenSnapshotWindowDec.store(100);
+
+ auto maxTargetSnapshotWindowSeconds =
+ snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load();
+ auto snapshotWindowSeconds = snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load();
+ ASSERT_EQ(maxTargetSnapshotWindowSeconds, snapshotWindowSeconds);
+
+ auto windowMultiplicativeDecrease =
+ snapshotWindowParams.snapshotWindowMultiplicativeDecrease.load();
+ auto windowAdditiveIncrease = snapshotWindowParams.snapshotWindowAdditiveIncreaseSeconds.load();
+
+ auto cachePressureThreshold = snapshotWindowParams.cachePressureThreshold.load();
+ auto minTimeBetweenInc = snapshotWindowParams.minMillisBetweenSnapshotWindowInc.load();
+
+ /**
+ * Test that decreasing the size succeeds when cache pressure is ABOVE the threshold
+ */
+
+ engine->setCachePressureForTest(cachePressureThreshold + 5);
+
+ decreaseTargetSnapshotWindowSize(_opCtx.get());
+ auto snapshotWindowSecondsOne =
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load();
+
+ ASSERT_GT(snapshotWindowSeconds, snapshotWindowSecondsOne);
+ ASSERT_EQ(snapshotWindowSecondsOne,
+ static_cast<int>(snapshotWindowSeconds * windowMultiplicativeDecrease));
+
+ /**
+ * Test that increasing the size SUCCEEDS when the cache pressure is BELOW the threshold.
+ */
+
+ engine->setCachePressureForTest(cachePressureThreshold - 5);
+
+ increaseTargetSnapshotWindowSize(_opCtx.get());
+ auto snapshotWindowSecondsTwo =
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load();
+
+ ASSERT_EQ(snapshotWindowSecondsTwo, snapshotWindowSecondsOne + windowAdditiveIncrease);
+
+ /**
+ * Test that increasing the size FAILS when the cache pressure is ABOVE the threshold, and
+ * instead this causes the size to be decreased.
+ */
+
+ engine->setCachePressureForTest(cachePressureThreshold + 5);
+
+ // Sleep for a time because increaseTargetSnapshotWindowSize() enforces a wait time between
+ // updates.
+ sleepmillis(2 * minTimeBetweenInc);
+
+ increaseTargetSnapshotWindowSize(_opCtx.get());
+ auto snapshotWindowSecondsThree =
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load();
+
+ ASSERT_EQ(snapshotWindowSecondsThree,
+ static_cast<int>(snapshotWindowSecondsTwo * windowMultiplicativeDecrease));
+
+ engine->setCachePressureForTest(cachePressureThreshold - 5);
+
+ /**
+ * Test that the size cannot be increased above the maximum size.
+ */
+
+ // Integers round down, so add 1 to make sure it reaches the max.
+ int numIncreasesToReachMax =
+ (maxTargetSnapshotWindowSeconds - snapshotWindowSecondsThree) / windowAdditiveIncrease + 1;
+ for (int i = 0; i < numIncreasesToReachMax; ++i) {
+ sleepmillis(2 * minTimeBetweenInc);
+ increaseTargetSnapshotWindowSize(_opCtx.get());
+ }
+
+ // Should be at max.
+ auto snapshotWindowSecondsFour =
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load();
+ ASSERT_EQ(snapshotWindowSecondsFour, maxTargetSnapshotWindowSeconds);
+
+ // An attempt to increase beyond max should have no effect.
+ sleepmillis(2 * minTimeBetweenInc);
+ increaseTargetSnapshotWindowSize(_opCtx.get());
+ auto snapshotWindowSecondsFive =
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load();
+ ASSERT_EQ(snapshotWindowSecondsFive, maxTargetSnapshotWindowSeconds);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/storage/devnull/SConscript b/src/mongo/db/storage/devnull/SConscript
index 1d960936a60..fa34cf41f83 100644
--- a/src/mongo/db/storage/devnull/SConscript
+++ b/src/mongo/db/storage/devnull/SConscript
@@ -13,6 +13,9 @@ env.Library(
'$BUILD_DIR/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store',
'$BUILD_DIR/mongo/db/storage/kv/kv_prefix',
],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/snapshot_window_options',
+ ],
)
env.Library(
diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp
index 8e158dbc3ea..0fdb22b1d12 100644
--- a/src/mongo/db/storage/devnull/devnull_kv_engine.cpp
+++ b/src/mongo/db/storage/devnull/devnull_kv_engine.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/storage/devnull/devnull_kv_engine.h"
#include "mongo/base/disallow_copying.h"
+#include "mongo/db/snapshot_window_options.h"
#include "mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.h"
#include "mongo/db/storage/record_store.h"
#include "mongo/db/storage/sorted_data_interface.h"
@@ -258,4 +259,14 @@ SortedDataInterface* DevNullKVEngine::getSortedDataInterface(OperationContext* o
const IndexDescriptor* desc) {
return new DevNullSortedDataInterface();
}
+
+bool DevNullKVEngine::isCacheUnderPressure(OperationContext* opCtx) const {
+ return (_cachePressureForTest >= snapshotWindowParams.cachePressureThreshold.load());
+}
+
+void DevNullKVEngine::setCachePressureForTest(int pressure) {
+ invariant(pressure >= 0 && pressure <= 100);
+ _cachePressureForTest = pressure;
}
+
+} // namespace mongo
diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.h b/src/mongo/db/storage/devnull/devnull_kv_engine.h
index 5e8af42c74d..df356cfaae1 100644
--- a/src/mongo/db/storage/devnull/devnull_kv_engine.h
+++ b/src/mongo/db/storage/devnull/devnull_kv_engine.h
@@ -39,6 +39,9 @@ namespace mongo {
class JournalListener;
+/**
+ * The devnull storage engine is intended for unit and performance testing.
+ */
class DevNullKVEngine : public KVEngine {
public:
virtual ~DevNullKVEngine() {}
@@ -92,6 +95,10 @@ public:
return true;
}
+ virtual bool isCacheUnderPressure(OperationContext* opCtx) const override;
+
+ virtual void setCachePressureForTest(int pressure) override;
+
virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident) {
return 1;
}
@@ -118,5 +125,7 @@ public:
private:
std::shared_ptr<void> _catalogInfo;
+
+ int _cachePressureForTest;
};
}
diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h
index f7b79356a04..65a9a281396 100644
--- a/src/mongo/db/storage/kv/kv_engine.h
+++ b/src/mongo/db/storage/kv/kv_engine.h
@@ -261,9 +261,26 @@ public:
virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) {}
/**
+ * See `StorageEngine::setOldestTimestampFromStable`
+ */
+ virtual void setOldestTimestampFromStable() {}
+
+ /**
* See `StorageEngine::setOldestTimestamp`
*/
- virtual void setOldestTimestamp(Timestamp oldestTimestamp) {}
+ virtual void setOldestTimestamp(Timestamp newOldestTimestamp) {}
+
+ /**
+ * See `StorageEngine::isCacheUnderPressure()`
+ */
+ virtual bool isCacheUnderPressure(OperationContext* opCtx) const {
+ return false;
+ }
+
+ /**
+ * See 'StorageEngine::setCachePressureForTest()'
+ */
+ virtual void setCachePressureForTest(int pressure) {}
/**
* See `StorageEngine::supportsRecoverToStableTimestamp`
diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.h b/src/mongo/db/storage/kv/kv_engine_test_harness.h
index 64edb26da44..5191a2d344c 100644
--- a/src/mongo/db/storage/kv/kv_engine_test_harness.h
+++ b/src/mongo/db/storage/kv/kv_engine_test_harness.h
@@ -37,6 +37,14 @@ namespace mongo {
class ClockSource;
+/**
+ * Creates a harness for generic KVEngine testing of all KVEngine implementations.
+ *
+ * A particular KVHarnessHelper implementation (with a particular KVEngine implementation) will
+ * implement registerFactory() and create() such that generic unit tests can create() and test the
+ * particular KVHarnessHelper implementation. This library can be pulled into a particular
+ * implementation's CppUnitTest to exercise the generic test coverage on that implementation.
+ */
class KVHarnessHelper {
public:
virtual ~KVHarnessHelper() {}
diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp
index 58f868d4a8f..d954893a2a7 100644
--- a/src/mongo/db/storage/kv/kv_storage_engine.cpp
+++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp
@@ -509,8 +509,20 @@ void KVStorageEngine::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
_engine->setInitialDataTimestamp(initialDataTimestamp);
}
-void KVStorageEngine::setOldestTimestamp(Timestamp oldestTimestamp) {
- _engine->setOldestTimestamp(oldestTimestamp);
+void KVStorageEngine::setOldestTimestampFromStable() {
+ _engine->setOldestTimestampFromStable();
+}
+
+void KVStorageEngine::setOldestTimestamp(Timestamp newOldestTimestamp) {
+ _engine->setOldestTimestamp(newOldestTimestamp);
+}
+
+bool KVStorageEngine::isCacheUnderPressure(OperationContext* opCtx) const {
+ return _engine->isCacheUnderPressure(opCtx);
+}
+
+void KVStorageEngine::setCachePressureForTest(int pressure) {
+ return _engine->setCachePressureForTest(pressure);
}
bool KVStorageEngine::supportsRecoverToStableTimestamp() const {
diff --git a/src/mongo/db/storage/kv/kv_storage_engine.h b/src/mongo/db/storage/kv/kv_storage_engine.h
index 79ae83adf52..6ffe83b5210 100644
--- a/src/mongo/db/storage/kv/kv_storage_engine.h
+++ b/src/mongo/db/storage/kv/kv_storage_engine.h
@@ -117,7 +117,13 @@ public:
virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
- virtual void setOldestTimestamp(Timestamp oldestTimestamp) override;
+ virtual void setOldestTimestampFromStable() override;
+
+ virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override;
+
+ virtual bool isCacheUnderPressure(OperationContext* opCtx) const override;
+
+ virtual void setCachePressureForTest(int pressure) override;
virtual bool supportsRecoverToStableTimestamp() const override;
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index 64f19f5dadf..99cc8ebd76f 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -371,10 +371,39 @@ public:
virtual void setInitialDataTimestamp(Timestamp timestamp) {}
/**
+ * Uses the current stable timestamp to set the oldest timestamp for which the storage engine
+ * must maintain snapshot history through.
+ *
+ * oldest_timestamp will be set to stable_timestamp adjusted by
+ * 'targetSnapshotHistoryWindowInSeconds' to create a window of available snapshots on the
+ * storage engine from oldest to stable. Furthermore, oldest_timestamp will never be set ahead
+ * of the oplog read timestamp, ensuring the oplog reader's 'read_timestamp' can always be
+ * serviced.
+ */
+ virtual void setOldestTimestampFromStable() {}
+
+ /**
* Sets the oldest timestamp for which the storage engine must maintain snapshot history
* through. Additionally, all future writes must be newer or equal to this value.
*/
- virtual void setOldestTimestamp(Timestamp timestampa) {}
+ virtual void setOldestTimestamp(Timestamp timestamp) {}
+
+ /**
+ * Indicates whether the storage engine cache is under pressure.
+ *
+ * Retrieves a cache pressure value in the range [0, 100] from the storage engine, and compares
+ * it against storageGlobalParams.cachePressureThreshold, a dynamic server parameter, to
+ * determine whether cache pressure is too high.
+ */
+ virtual bool isCacheUnderPressure(OperationContext* opCtx) const {
+ return false;
+ }
+
+ /**
+ * For unit tests only. Sets the cache pressure value with which isCacheUnderPressure()
+ * evalutates to 'pressure'.
+ */
+ virtual void setCachePressureForTest(int pressure) {}
/**
* Notifies the storage engine that a replication batch has completed.
diff --git a/src/mongo/db/storage/test_harness_helper.h b/src/mongo/db/storage/test_harness_helper.h
index 8ce241bcb3e..c0891f0235d 100644
--- a/src/mongo/db/storage/test_harness_helper.h
+++ b/src/mongo/db/storage/test_harness_helper.h
@@ -43,6 +43,15 @@
#include "mongo/util/unowned_ptr.h"
namespace mongo {
+
+/**
+ * Sets up an OperationContext with a Recovery Unit. Uses a ServiceContextNoop.
+ *
+ * A particular HarnessHelper implementation will implement registerHarnessHelperFactory() and
+ * newHarnessHelper() such that generic unit tests can create and test that particular
+ * HarnessHelper implementation. The newRecoveryUnit() implementation dictates what RecoveryUnit
+ * implementation the OperationContext has.
+ */
class HarnessHelper {
public:
virtual ~HarnessHelper() = 0;
@@ -72,9 +81,9 @@ public:
return &_serviceContext;
}
-private:
virtual std::unique_ptr<RecoveryUnit> newRecoveryUnit() = 0;
+private:
ServiceContextNoop _serviceContext;
ServiceContext::UniqueClient _client = _serviceContext.makeClient("hh");
};
diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript
index aee0fb5d8a5..9d42061472d 100644
--- a/src/mongo/db/storage/wiredtiger/SConscript
+++ b/src/mongo/db/storage/wiredtiger/SConscript
@@ -80,6 +80,7 @@ if wiredtiger:
'storage_wiredtiger_customization_hooks',
],
LIBDEPS_PRIVATE= [
+ '$BUILD_DIR/mongo/db/snapshot_window_options',
'$BUILD_DIR/mongo/util/options_parser/options_parser',
],
)
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 5a5c200abaa..483d50ea9df 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -65,6 +65,7 @@
#include "mongo/db/server_options.h"
#include "mongo/db/server_parameters.h"
#include "mongo/db/service_context.h"
+#include "mongo/db/snapshot_window_options.h"
#include "mongo/db/storage/journal_listener.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h"
@@ -1153,11 +1154,6 @@ bool WiredTigerKVEngine::initRsOplogBackgroundThread(StringData ns) {
return initRsOplogBackgroundThreadCallback(ns);
}
-void WiredTigerKVEngine::setOldestTimestamp(Timestamp oldestTimestamp) {
- constexpr bool doForce = true;
- _setOldestTimestamp(oldestTimestamp, doForce);
-}
-
namespace {
MONGO_FP_DECLARE(WTPreserveSnapshotHistoryIndefinitely);
@@ -1196,35 +1192,58 @@ void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp) {
}
invariant(static_cast<std::size_t>(size) < sizeof(stableTSConfigString));
invariantWTOK(_conn->set_timestamp(_conn, stableTSConfigString));
+ {
+ stdx::lock_guard<stdx::mutex> lock(_stableTimestampMutex);
+ // set_timestamp above ignores backwards in time without force.
+ if (_stableTimestamp < stableTimestamp) {
+ _stableTimestamp = stableTimestamp;
+ }
+ }
if (_checkpointThread) {
_checkpointThread->setStableTimestamp(stableTimestamp);
}
- // Communicate to WiredTiger that it can clean up timestamp data earlier than the timestamp
- // provided. No future queries will need point-in-time reads at a timestamp prior to the one
- // provided here.
+ // Forward the oldest timestamp so that WiredTiger can clean up earlier timestamp data.
if (!MONGO_FAIL_POINT(WTPreserveSnapshotHistoryIndefinitely)) {
- _setOldestTimestamp(stableTimestamp);
+ setOldestTimestampFromStable();
}
}
-void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool force) {
+void WiredTigerKVEngine::setOldestTimestampFromStable() {
+ Timestamp stableTimestamp;
+ {
+ stdx::lock_guard<stdx::mutex> lock(_stableTimestampMutex);
+ stableTimestamp = _stableTimestamp;
+ }
- if (oldestTimestamp == Timestamp()) {
- // Nothing to set yet.
+ // Calculate what the oldest_timestamp should be from the stable_timestamp. The oldest
+ // timestamp should lag behind stable by 'targetSnapshotHistoryWindowInSeconds' to create a
+ // window of available snapshots. If the lag window is not yet large enough, we will not
+ // update/forward the oldest_timestamp yet and instead return early.
+ Timestamp newOldestTimestamp = _calculateHistoryLagFromStableTimestamp(stableTimestamp);
+ if (newOldestTimestamp.isNull()) {
return;
}
+
const auto oplogReadTimestamp = Timestamp(_oplogManager->getOplogReadTimestamp());
- if (!force && !oplogReadTimestamp.isNull() && oldestTimestamp > oplogReadTimestamp) {
+ if (!oplogReadTimestamp.isNull() && newOldestTimestamp > oplogReadTimestamp) {
// Oplog visibility is updated asynchronously from replication updating the commit point.
// When force is not set, lag the `oldest_timestamp` to the possibly stale oplog read
// timestamp value. This guarantees an oplog reader's `read_timestamp` can always
// be serviced. When force is set, we respect the caller's request and do not lag the
// oldest timestamp.
- oldestTimestamp = oplogReadTimestamp;
+ newOldestTimestamp = oplogReadTimestamp;
}
+ _setOldestTimestamp(newOldestTimestamp, false);
+}
+
+void WiredTigerKVEngine::setOldestTimestamp(Timestamp newOldestTimestamp) {
+ _setOldestTimestamp(newOldestTimestamp, true);
+}
+
+void WiredTigerKVEngine::_setOldestTimestamp(Timestamp newOldestTimestamp, bool force) {
char oldestTSConfigString["force=true,oldest_timestamp=,commit_timestamp="_sd.size() +
(2 * 8 * 2) /* 2 timestamps of 16 hexadecimal digits each */ +
1 /* trailing null */];
@@ -1233,13 +1252,13 @@ void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool for
size = std::snprintf(oldestTSConfigString,
sizeof(oldestTSConfigString),
"force=true,oldest_timestamp=%llx,commit_timestamp=%llx",
- oldestTimestamp.asULL(),
- oldestTimestamp.asULL());
+ newOldestTimestamp.asULL(),
+ newOldestTimestamp.asULL());
} else {
size = std::snprintf(oldestTSConfigString,
sizeof(oldestTSConfigString),
"oldest_timestamp=%llx",
- oldestTimestamp.asULL());
+ newOldestTimestamp.asULL());
}
if (size < 0) {
int e = errno;
@@ -1248,14 +1267,51 @@ void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool for
}
invariant(static_cast<std::size_t>(size) < sizeof(oldestTSConfigString));
invariantWTOK(_conn->set_timestamp(_conn, oldestTSConfigString));
+ {
+ stdx::lock_guard<stdx::mutex> lock(_oldestTimestampMutex);
+ // set_timestamp above ignores backwards in time without force.
+ if (force) {
+ _oldestTimestamp = newOldestTimestamp;
+ } else if (_oldestTimestamp < newOldestTimestamp) {
+ _oldestTimestamp = newOldestTimestamp;
+ }
+ }
if (force) {
- LOG(2) << "oldest_timestamp and commit_timestamp force set to " << oldestTimestamp;
+ LOG(2) << "oldest_timestamp and commit_timestamp force set to " << newOldestTimestamp;
} else {
- LOG(2) << "oldest_timestamp set to " << oldestTimestamp;
+ LOG(2) << "oldest_timestamp set to " << newOldestTimestamp;
}
}
+Timestamp WiredTigerKVEngine::_calculateHistoryLagFromStableTimestamp(Timestamp stableTimestamp) {
+
+ // The oldest_timestamp should lag behind the stable_timestamp by
+ // 'targetSnapshotHistoryWindowInSeconds' seconds.
+
+ if (stableTimestamp.getSecs() <
+ static_cast<unsigned>(snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load())) {
+ // The history window is larger than the timestamp history thus far. We must wait for
+ // the history to reach the window size before moving oldest_timestamp forward.
+ return Timestamp();
+ }
+
+ Timestamp calculatedOldestTimestamp(
+ stableTimestamp.getSecs() -
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load(),
+ stableTimestamp.getInc());
+ {
+ stdx::lock_guard<stdx::mutex> lock(_oldestTimestampMutex);
+ if (calculatedOldestTimestamp <= _oldestTimestamp) {
+ // The stable_timestamp is not far enough ahead of the oldest_timestamp for the
+ // oldest_timestamp to be moved forward: the window is still too small.
+ return Timestamp();
+ }
+ }
+
+ return calculatedOldestTimestamp;
+}
+
void WiredTigerKVEngine::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
if (_checkpointThread) {
_checkpointThread->setInitialDataTimestamp(initialDataTimestamp);
@@ -1380,4 +1436,24 @@ void WiredTigerKVEngine::replicationBatchIsComplete() const {
_oplogManager->triggerJournalFlush();
}
+bool WiredTigerKVEngine::isCacheUnderPressure(OperationContext* opCtx) const {
+ WiredTigerSession* session = WiredTigerRecoveryUnit::get(opCtx)->getSessionNoTxn();
+ invariant(session);
+
+ int64_t score = uassertStatusOK(WiredTigerUtil::getStatisticsValueAs<int64_t>(
+ session->getSession(), "statistics:", "", WT_STAT_CONN_CACHE_LOOKASIDE_SCORE));
+
+ return (score >= snapshotWindowParams.cachePressureThreshold.load());
+}
+
+Timestamp WiredTigerKVEngine::getStableTimestamp() const {
+ stdx::lock_guard<stdx::mutex> lock(_stableTimestampMutex);
+ return _stableTimestamp;
+}
+
+Timestamp WiredTigerKVEngine::getOldestTimestamp() const {
+ stdx::lock_guard<stdx::mutex> lock(_oldestTimestampMutex);
+ return _oldestTimestamp;
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index 98806dd4222..6d62e8663b7 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -180,11 +180,9 @@ public:
virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
- /**
- * This method will force the oldest timestamp to the input value. Callers must be serialized
- * along with `setStableTimestamp`
- */
- void setOldestTimestamp(Timestamp oldestTimestamp);
+ virtual void setOldestTimestampFromStable() override;
+
+ virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override;
virtual bool supportsRecoverToStableTimestamp() const override;
@@ -254,7 +252,7 @@ public:
/**
* Sets the implementation for `initRsOplogBackgroundThread` (allowing tests to skip the
- * background job, for example). Intended to be called from a MONGO_INITIALIZER and therefroe in
+ * background job, for example). Intended to be called from a MONGO_INITIALIZER and therefore in
* a single threaded context.
*/
static void setInitRsOplogBackgroundThreadCallback(stdx::function<bool(StringData)> cb);
@@ -269,6 +267,15 @@ public:
static void appendGlobalStats(BSONObjBuilder& b);
+ bool isCacheUnderPressure(OperationContext* opCtx) const override;
+
+ /**
+ * These are timestamp access functions for serverStatus to be able to report the actual
+ * snapshot window size.
+ */
+ Timestamp getStableTimestamp() const;
+ Timestamp getOldestTimestamp() const;
+
private:
class WiredTigerJournalFlusher;
class WiredTigerCheckpointThread;
@@ -280,7 +287,22 @@ private:
std::string _uri(StringData ident) const;
- void _setOldestTimestamp(Timestamp oldestTimestamp, bool force = false);
+ /**
+ * Uses the 'stableTimestamp', the 'targetSnapshotHistoryWindowInSeconds' setting and the
+ * current _oldestTimestamp to calculate what the new oldest_timestamp should be, in order to
+ * maintain a window of available snapshots on the storage engine from oldest to stable
+ * timestamp.
+ *
+ * If the returned Timestamp isNull(), oldest_timestamp should not be moved forward.
+ */
+ Timestamp _calculateHistoryLagFromStableTimestamp(Timestamp stableTimestamp);
+
+ /**
+ * Sets the oldest timestamp for which the storage engine must maintain snapshot history
+ * through. If force is true, oldest will be set to the given input value, unmodified, even if
+ * it is backwards in time from the last oldest timestamp (accomodating initial sync).
+ */
+ void _setOldestTimestamp(Timestamp newOldestTimestamp, bool force);
WT_CONNECTION* _conn;
WT_EVENT_HANDLER _eventHandler;
@@ -319,5 +341,13 @@ private:
std::unique_ptr<WiredTigerSession> _backupSession;
Timestamp _recoveryTimestamp;
WiredTigerFileVersion _fileVersion;
+
+ // Ensures accesses to _oldestTimestamp and _stableTimestamp, respectively, are multi-core safe.
+ mutable stdx::mutex _oldestTimestampMutex;
+ mutable stdx::mutex _stableTimestampMutex;
+
+ // Tracks the stable and oldest timestamps we've set on the storage engine.
+ Timestamp _oldestTimestamp;
+ Timestamp _stableTimestamp;
};
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
index dce16581223..75dc1b4d029 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
@@ -105,6 +105,10 @@ public:
return std::move(ret);
}
+ WiredTigerKVEngine* getEngine() {
+ return &_engine;
+ }
+
private:
unittest::TempDir _dbpath;
ClockSourceMock _cs;
@@ -142,14 +146,14 @@ public:
}
void setUp() override {
- harnessHelper = newRecoveryUnitHarnessHelper();
+ harnessHelper = std::make_unique<WiredTigerRecoveryUnitHarnessHelper>();
clientAndCtx1 = makeClientAndOpCtx(harnessHelper.get(), "writer");
clientAndCtx2 = makeClientAndOpCtx(harnessHelper.get(), "reader");
ru1 = checked_cast<WiredTigerRecoveryUnit*>(clientAndCtx1.second->recoveryUnit());
ru2 = checked_cast<WiredTigerRecoveryUnit*>(clientAndCtx2.second->recoveryUnit());
}
- std::unique_ptr<RecoveryUnitHarnessHelper> harnessHelper;
+ std::unique_ptr<WiredTigerRecoveryUnitHarnessHelper> harnessHelper;
ClientAndCtx clientAndCtx1, clientAndCtx2;
WiredTigerRecoveryUnit *ru1, *ru2;
@@ -163,6 +167,50 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, SetReadSource) {
ASSERT_EQ(RecoveryUnit::ReadSource::kProvided, ru1->getTimestampReadSource());
ASSERT_EQ(Timestamp(1, 1), ru1->getPointInTimeReadTimestamp());
}
+
+TEST_F(WiredTigerRecoveryUnitTestFixture, CreateAndCheckForCachePressure) {
+ int time = 1;
+
+ // Reconfigure the size of the cache to be very small so that building cache pressure is fast.
+ WiredTigerKVEngine* engine = harnessHelper->getEngine();
+ std::string cacheSizeReconfig = "cache_size=1MB";
+ ASSERT_EQ(engine->reconfigure(cacheSizeReconfig.c_str()), 0);
+
+ OperationContext* opCtx = clientAndCtx1.second.get();
+ std::unique_ptr<RecordStore> rs(harnessHelper->createRecordStore(opCtx, "a.b"));
+
+ // Insert one document so that we can then update it in a loop to create cache pressure.
+ // Note: inserts will not create cache pressure.
+ WriteUnitOfWork wu(opCtx);
+ ASSERT_OK(ru1->setTimestamp(Timestamp(time++)));
+ std::string str = str::stream() << "foobarbaz";
+ StatusWith<RecordId> ress =
+ rs->insertRecord(opCtx, str.c_str(), str.size() + 1, Timestamp(), false);
+ ASSERT_OK(ress.getStatus());
+ auto recordId = ress.getValue();
+ wu.commit();
+
+ for (int j = 0; j < 1000; ++j) {
+ // Once we hit the cache pressure threshold, i.e. have successfully created cache pressure
+ // that is detectable, we are done.
+ if (engine->isCacheUnderPressure(opCtx)) {
+ invariant(j != 0);
+ break;
+ }
+
+ try {
+ WriteUnitOfWork wuow(opCtx);
+ ASSERT_OK(ru1->setTimestamp(Timestamp(time++)));
+ std::string s = str::stream()
+ << "abcbcdcdedefefgfghghihijijkjklklmlmnmnomopopqpqrqrsrststutuv" << j;
+ ASSERT_OK(rs->updateRecord(opCtx, recordId, s.c_str(), s.size() + 1, false, nullptr));
+ wuow.commit();
+ } catch (const DBException& ex) {
+ invariant(ex.toStatus().code() == ErrorCodes::WriteConflict);
+ }
+ }
+}
+
TEST_F(WiredTigerRecoveryUnitTestFixture,
LocalReadOnADocumentBeingPreparedTriggersPrepareConflict) {
// Prepare but don't commit a transaction
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
index 3a58b4f91d9..4cd9af2244f 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
@@ -42,7 +42,6 @@
#include "mongo/db/storage/wiredtiger/wiredtiger_session_cache.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_util.h"
#include "mongo/util/assert_util.h"
-#include "mongo/util/scopeguard.h"
namespace mongo {
@@ -79,6 +78,8 @@ BSONObj WiredTigerServerStatusSection::generateSection(OperationContext* opCtx,
WiredTigerKVEngine::appendGlobalStats(bob);
+ WiredTigerUtil::appendSnapshotWindowSettings(_engine, session, &bob);
+
return bob.obj();
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
index bfa26ed8772..7234190b222 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp
@@ -39,6 +39,8 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
#include "mongo/db/server_parameters.h"
+#include "mongo/db/snapshot_window_options.h"
+#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_session_cache.h"
#include "mongo/util/assert_util.h"
@@ -591,4 +593,36 @@ Status WiredTigerUtil::exportTableToBSON(WT_SESSION* session,
return Status::OK();
}
+void WiredTigerUtil::appendSnapshotWindowSettings(WiredTigerKVEngine* engine,
+ WiredTigerSession* session,
+ BSONObjBuilder* bob) {
+ invariant(engine);
+ invariant(session);
+ invariant(bob);
+
+ const Timestamp& stableTimestamp = engine->getStableTimestamp();
+ const Timestamp& oldestTimestamp = engine->getOldestTimestamp();
+
+ const unsigned currentAvailableSnapshotWindow =
+ stableTimestamp.getSecs() - oldestTimestamp.getSecs();
+
+ int64_t score = uassertStatusOK(WiredTigerUtil::getStatisticsValueAs<int64_t>(
+ session->getSession(), "statistics:", "", WT_STAT_CONN_CACHE_LOOKASIDE_SCORE));
+
+ BSONObjBuilder settings(bob->subobjStart("snapshot-window-settings"));
+ settings.append("cache pressure percentage threshold",
+ snapshotWindowParams.cachePressureThreshold.load());
+ settings.append("current cache pressure percentage", score);
+ settings.append("max target available snapshots window size in seconds",
+ snapshotWindowParams.maxTargetSnapshotHistoryWindowInSeconds.load());
+ settings.append("target available snapshots window size in seconds",
+ snapshotWindowParams.targetSnapshotHistoryWindowInSeconds.load());
+ settings.append("current available snapshots window size in seconds",
+ currentAvailableSnapshotWindow);
+ settings.append("latest majority snapshot timestamp available",
+ stableTimestamp.toStringPretty());
+ settings.append("oldest majority snapshot timestamp available",
+ oldestTimestamp.toStringPretty());
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
index 5817a5855f6..4b09387a157 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h
@@ -45,6 +45,8 @@ namespace mongo {
class BSONObjBuilder;
class OperationContext;
class WiredTigerConfigParser;
+class WiredTigerKVEngine;
+class WiredTigerSession;
inline bool wt_keeptxnopen() {
return false;
@@ -116,6 +118,24 @@ public:
BSONObjBuilder* bob);
/**
+ * Appends information about the storage engine's currently available snapshots and the settings
+ * that affect that window of maintained history.
+ *
+ * "snapshot-window-settings" : {
+ * "cache pressure percentage threshold" : <num>,
+ * "current cache pressure percentage" : <num>,
+ * "max target available snapshots window size in seconds" : <num>,
+ * "target available snapshots window size in seconds" : <num>,
+ * "current available snapshots window size in seconds" : <num>,
+ * "latest majority snapshot timestamp available" : <num>,
+ * "oldest majority snapshot timestamp available" : <num>
+ * }
+ */
+ static void appendSnapshotWindowSettings(WiredTigerKVEngine* engine,
+ WiredTigerSession* session,
+ BSONObjBuilder* bob);
+
+ /**
* Gets entire metadata string for collection/index at URI.
*/
static StatusWith<std::string> getMetadata(OperationContext* opCtx, StringData uri);