diff options
author | Dianna Hohensee <dianna.hohensee@mongodb.com> | 2020-05-28 20:10:48 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-06-05 14:11:49 +0000 |
commit | fd2410489eab2186e45c1706bbefb37084b782c4 (patch) | |
tree | 375d7e77ac038fe4a57f20b7fbbce654e5291f6b | |
parent | 246dc30d9cae7f240e1bbd8d58f89a3289615fb1 (diff) | |
download | mongo-fd2410489eab2186e45c1706bbefb37084b782c4.tar.gz |
SERVER-45642 Fix WiredTigerCheckpointThread to skip checkpointing if syncdelay is set to 0; disallow users from setting syncdelay values greater than an hour; and stop spurious checkpoint thread wakeups.
Also move JS tests using high syncdelay values to using the now working 0 value to disable checkpointing.
(cherry picked from commit 8ee572abe5a47d3d49e7436c1a376ac395517add)
-rw-r--r-- | jstests/noPassthrough/parameters.js | 2 | ||||
-rw-r--r-- | jstests/replsets/reconfig_waits_for_config_durability.js | 4 | ||||
-rw-r--r-- | jstests/replsets/trigger_initial_stable_checkpoint.js | 4 | ||||
-rw-r--r-- | src/mongo/db/mongod_options_storage.idl | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_options.h | 4 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 60 |
6 files changed, 50 insertions, 26 deletions
diff --git a/jstests/noPassthrough/parameters.js b/jstests/noPassthrough/parameters.js index 8ad2eb125d3..0844cf76b99 100644 --- a/jstests/noPassthrough/parameters.js +++ b/jstests/noPassthrough/parameters.js @@ -36,7 +36,7 @@ setAndCheckParameter(dbConn, "traceExceptions", 0, false); setAndCheckParameter(dbConn, "traceExceptions", "foo", true); setAndCheckParameter(dbConn, "traceExceptions", "", true); setAndCheckParameter(dbConn, "syncdelay", 0); -setAndCheckParameter(dbConn, "syncdelay", 8000); +setAndCheckParameter(dbConn, "syncdelay", 3000); function ensureSetParameterFailure(dbConn, parameterName, newValue, reason) { jsTest.log("Test setting parameter: " + parameterName + " to invalid value: " + newValue); diff --git a/jstests/replsets/reconfig_waits_for_config_durability.js b/jstests/replsets/reconfig_waits_for_config_durability.js index 929e96035d0..f8ac380b94e 100644 --- a/jstests/replsets/reconfig_waits_for_config_durability.js +++ b/jstests/replsets/reconfig_waits_for_config_durability.js @@ -16,8 +16,8 @@ load("jstests/libs/fail_point_util.js"); const rst = new ReplSetTest({ nodes: [{}, {rsConfig: {priority: 0}}], nodeOptions: { - // Turn up the syncdelay (in seconds) to effectively disable background checkpoints. - syncdelay: 600, + // Disable background checkpoints: a zero value disables checkpointing. + syncdelay: 0, setParameter: {logComponentVerbosity: tojson({storage: 2})} }, useBridge: true diff --git a/jstests/replsets/trigger_initial_stable_checkpoint.js b/jstests/replsets/trigger_initial_stable_checkpoint.js index bbc07b6f38a..735f8e7a562 100644 --- a/jstests/replsets/trigger_initial_stable_checkpoint.js +++ b/jstests/replsets/trigger_initial_stable_checkpoint.js @@ -8,8 +8,8 @@ const rst = new ReplSetTest({ nodes: 1, nodeOptions: { - // Turn up the syncdelay (in seconds) to effectively disable background checkpoints. - syncdelay: 600, + // Disable background checkpoints: a zero value disables checkpointing. + syncdelay: 0, setParameter: {logComponentVerbosity: tojson({storage: 2})} } }); diff --git a/src/mongo/db/mongod_options_storage.idl b/src/mongo/db/mongod_options_storage.idl index 116ad5ecacf..b0de309fc42 100644 --- a/src/mongo/db/mongod_options_storage.idl +++ b/src/mongo/db/mongod_options_storage.idl @@ -84,7 +84,7 @@ configs: hidden: true 'storage.syncPeriodSecs': - description: 'Seconds between disk syncs (0=never, but not recommended)' + description: 'Seconds between disk syncs' short_name: syncdelay arg_vartype: Double default: 60.0 diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h index 4202cbc10ef..2d7e1a21ba7 100644 --- a/src/mongo/db/storage/storage_options.h +++ b/src/mongo/db/storage/storage_options.h @@ -95,8 +95,8 @@ struct StorageGlobalParams { // via an fsync operation. // Do not set this value on production systems. // In almost every situation, you should use the default setting. - static constexpr double kMaxSyncdelaySecs = 9.0 * 1000.0 * 1000.0; - AtomicDouble syncdelay; // seconds between fsyncs + static constexpr double kMaxSyncdelaySecs = 60 * 60; // 1hr + AtomicDouble syncdelay; // seconds between fsyncs // --queryableBackupMode // Puts MongoD into "read-only" mode. MongoD will not write any data to the underlying diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 35eb5712744..811c5757cbd 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -434,26 +434,43 @@ public: virtual void run() { ThreadClient tc(name(), getGlobalServiceContext()); - LOGV2_DEBUG(22307, 1, "starting {name} thread", "name"_attr = name()); + LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name()); - while (!_shuttingDown.load()) { + while (true) { auto opCtx = tc->makeOperationContext(); { stdx::unique_lock<Latch> lock(_mutex); MONGO_IDLE_THREAD_BLOCK; + + // Wait for 'wiredTigerGlobalOptions.checkpointDelaySecs' seconds; or until either + // shutdown is signaled or a checkpoint is triggered. _condvar.wait_for(lock, stdx::chrono::seconds(static_cast<std::int64_t>( - wiredTigerGlobalOptions.checkpointDelaySecs))); - } + wiredTigerGlobalOptions.checkpointDelaySecs)), + [&] { return _shuttingDown || _triggerCheckpoint; }); + + // If the checkpointDelaySecs is set to 0, that means we should skip checkpointing. + // However, checkpointDelaySecs is adjustable by a runtime server parameter, so we + // need to wake up to check periodically. The wakeup to check period is arbitrary. + while (wiredTigerGlobalOptions.checkpointDelaySecs == 0 && !_shuttingDown && + !_triggerCheckpoint) { + _condvar.wait_for(lock, + stdx::chrono::seconds(static_cast<std::int64_t>(3)), + [&] { return _shuttingDown || _triggerCheckpoint; }); + } - pauseCheckpointThread.pauseWhileSet(); + if (_shuttingDown) { + LOGV2_DEBUG(22309, 1, "Stopping thread", "threadName"_attr = name()); + return; + } - // Might have been awakened by another thread shutting us down. - if (_shuttingDown.load()) { - break; + // Clear the trigger so we do not immediately checkpoint again after this. + _triggerCheckpoint = false; } + pauseCheckpointThread.pauseWhileSet(); + const Date_t startTime = Date_t::now(); const Timestamp stableTimestamp = _wiredTigerKVEngine->getStableTimestamp(); @@ -540,13 +557,13 @@ public: invariant(ErrorCodes::isShutdownError(exc.code()), exc.what()); } } - LOGV2_DEBUG(22309, 1, "stopping {name} thread", "name"_attr = name()); } /** * Returns true if we have already triggered taking the first checkpoint. */ bool hasTriggeredFirstStableCheckpoint() { + stdx::unique_lock<Latch> lock(_mutex); return _hasTriggeredFirstStableCheckpoint; } @@ -563,9 +580,9 @@ public: void triggerFirstStableCheckpoint(Timestamp prevStable, Timestamp initialData, Timestamp currStable) { + stdx::unique_lock<Latch> lock(_mutex); invariant(!_hasTriggeredFirstStableCheckpoint); if (prevStable < initialData && currStable >= initialData) { - _hasTriggeredFirstStableCheckpoint = true; LOGV2(22310, "Triggering the first stable checkpoint. Initial Data: {initialData} PrevStable: " "{prevStable} CurrStable: {currStable}", @@ -573,7 +590,8 @@ public: "initialData"_attr = initialData, "prevStable"_attr = prevStable, "currStable"_attr = currStable); - stdx::unique_lock<Latch> lock(_mutex); + _hasTriggeredFirstStableCheckpoint = true; + _triggerCheckpoint = true; _condvar.notify_one(); } } @@ -592,9 +610,9 @@ public: } void shutdown() { - _shuttingDown.store(true); { stdx::unique_lock<Latch> lock(_mutex); + _shuttingDown = true; // Wake up the checkpoint thread early, to take a final checkpoint before shutting // down, if one has not coincidentally just been taken. _condvar.notify_one(); @@ -606,19 +624,25 @@ private: WiredTigerKVEngine* _wiredTigerKVEngine; WiredTigerSessionCache* _sessionCache; + Mutex _oplogNeededForCrashRecoveryMutex = + MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_oplogNeededForCrashRecoveryMutex"); + AtomicWord<std::uint64_t> _oplogNeededForCrashRecovery; + + // Protects the state below. Mutex _mutex = MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_mutex"); - ; // protects _condvar + // The checkpoint thread idles on this condition variable for a particular time duration between - // taking checkpoints. It can be triggered early to expediate immediate checkpointing. + // taking checkpoints. It can be triggered early to expedite either: immediate checkpointing if + // _triggerCheckpoint is set; or shutdown cleanup if _shuttingDown is set. stdx::condition_variable _condvar; - AtomicWord<bool> _shuttingDown{false}; + bool _shuttingDown = false; + // This flag ensures the first stable checkpoint is only triggered once. bool _hasTriggeredFirstStableCheckpoint = false; - Mutex _oplogNeededForCrashRecoveryMutex = - MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_oplogNeededForCrashRecoveryMutex"); - AtomicWord<std::uint64_t> _oplogNeededForCrashRecovery; + // This flag allows the checkpoint thread to wake up early when _condvar is signaled. + bool _triggerCheckpoint = false; }; namespace { |