From 8ee572abe5a47d3d49e7436c1a376ac395517add Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Thu, 28 May 2020 20:10:48 -0400 Subject: SERVER-45642 Fix WiredTigerCheckpointThread to skip checkpointing if syncdelay is set to 0; disallow users from setting syncdelay values greater than an hour; and stop spurious checkpoint thread wakeups. Also move JS tests using high syncdelay values to using the now working 0 value to disable checkpointing. --- src/mongo/db/mongod_options_storage.idl | 2 +- src/mongo/db/storage/storage_options.h | 4 +- .../db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 60 +++++++++++++++------- 3 files changed, 45 insertions(+), 21 deletions(-) (limited to 'src/mongo') diff --git a/src/mongo/db/mongod_options_storage.idl b/src/mongo/db/mongod_options_storage.idl index 116ad5ecacf..b0de309fc42 100644 --- a/src/mongo/db/mongod_options_storage.idl +++ b/src/mongo/db/mongod_options_storage.idl @@ -84,7 +84,7 @@ configs: hidden: true 'storage.syncPeriodSecs': - description: 'Seconds between disk syncs (0=never, but not recommended)' + description: 'Seconds between disk syncs' short_name: syncdelay arg_vartype: Double default: 60.0 diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h index 4202cbc10ef..2d7e1a21ba7 100644 --- a/src/mongo/db/storage/storage_options.h +++ b/src/mongo/db/storage/storage_options.h @@ -95,8 +95,8 @@ struct StorageGlobalParams { // via an fsync operation. // Do not set this value on production systems. // In almost every situation, you should use the default setting. - static constexpr double kMaxSyncdelaySecs = 9.0 * 1000.0 * 1000.0; - AtomicDouble syncdelay; // seconds between fsyncs + static constexpr double kMaxSyncdelaySecs = 60 * 60; // 1hr + AtomicDouble syncdelay; // seconds between fsyncs // --queryableBackupMode // Puts MongoD into "read-only" mode. MongoD will not write any data to the underlying diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index a6aa26de924..97380b509e8 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -266,26 +266,43 @@ public: virtual void run() { ThreadClient tc(name(), getGlobalServiceContext()); - LOGV2_DEBUG(22307, 1, "starting {name} thread", "name"_attr = name()); + LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name()); - while (!_shuttingDown.load()) { + while (true) { auto opCtx = tc->makeOperationContext(); { stdx::unique_lock lock(_mutex); MONGO_IDLE_THREAD_BLOCK; + + // Wait for 'wiredTigerGlobalOptions.checkpointDelaySecs' seconds; or until either + // shutdown is signaled or a checkpoint is triggered. _condvar.wait_for(lock, stdx::chrono::seconds(static_cast( - wiredTigerGlobalOptions.checkpointDelaySecs))); - } + wiredTigerGlobalOptions.checkpointDelaySecs)), + [&] { return _shuttingDown || _triggerCheckpoint; }); + + // If the checkpointDelaySecs is set to 0, that means we should skip checkpointing. + // However, checkpointDelaySecs is adjustable by a runtime server parameter, so we + // need to wake up to check periodically. The wakeup to check period is arbitrary. + while (wiredTigerGlobalOptions.checkpointDelaySecs == 0 && !_shuttingDown && + !_triggerCheckpoint) { + _condvar.wait_for(lock, + stdx::chrono::seconds(static_cast(3)), + [&] { return _shuttingDown || _triggerCheckpoint; }); + } - pauseCheckpointThread.pauseWhileSet(); + if (_shuttingDown) { + LOGV2_DEBUG(22309, 1, "Stopping thread", "threadName"_attr = name()); + return; + } - // Might have been awakened by another thread shutting us down. - if (_shuttingDown.load()) { - break; + // Clear the trigger so we do not immediately checkpoint again after this. + _triggerCheckpoint = false; } + pauseCheckpointThread.pauseWhileSet(); + const Date_t startTime = Date_t::now(); const Timestamp stableTimestamp = _wiredTigerKVEngine->getStableTimestamp(); @@ -372,13 +389,13 @@ public: invariant(ErrorCodes::isShutdownError(exc.code()), exc.what()); } } - LOGV2_DEBUG(22309, 1, "stopping {name} thread", "name"_attr = name()); } /** * Returns true if we have already triggered taking the first checkpoint. */ bool hasTriggeredFirstStableCheckpoint() { + stdx::unique_lock lock(_mutex); return _hasTriggeredFirstStableCheckpoint; } @@ -395,9 +412,9 @@ public: void triggerFirstStableCheckpoint(Timestamp prevStable, Timestamp initialData, Timestamp currStable) { + stdx::unique_lock lock(_mutex); invariant(!_hasTriggeredFirstStableCheckpoint); if (prevStable < initialData && currStable >= initialData) { - _hasTriggeredFirstStableCheckpoint = true; LOGV2(22310, "Triggering the first stable checkpoint. Initial Data: {initialData} PrevStable: " "{prevStable} CurrStable: {currStable}", @@ -405,7 +422,8 @@ public: "initialData"_attr = initialData, "prevStable"_attr = prevStable, "currStable"_attr = currStable); - stdx::unique_lock lock(_mutex); + _hasTriggeredFirstStableCheckpoint = true; + _triggerCheckpoint = true; _condvar.notify_one(); } } @@ -424,9 +442,9 @@ public: } void shutdown() { - _shuttingDown.store(true); { stdx::unique_lock lock(_mutex); + _shuttingDown = true; // Wake up the checkpoint thread early, to take a final checkpoint before shutting // down, if one has not coincidentally just been taken. _condvar.notify_one(); @@ -438,19 +456,25 @@ private: WiredTigerKVEngine* _wiredTigerKVEngine; WiredTigerSessionCache* _sessionCache; + Mutex _oplogNeededForCrashRecoveryMutex = + MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_oplogNeededForCrashRecoveryMutex"); + AtomicWord _oplogNeededForCrashRecovery; + + // Protects the state below. Mutex _mutex = MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_mutex"); - ; // protects _condvar + // The checkpoint thread idles on this condition variable for a particular time duration between - // taking checkpoints. It can be triggered early to expediate immediate checkpointing. + // taking checkpoints. It can be triggered early to expedite either: immediate checkpointing if + // _triggerCheckpoint is set; or shutdown cleanup if _shuttingDown is set. stdx::condition_variable _condvar; - AtomicWord _shuttingDown{false}; + bool _shuttingDown = false; + // This flag ensures the first stable checkpoint is only triggered once. bool _hasTriggeredFirstStableCheckpoint = false; - Mutex _oplogNeededForCrashRecoveryMutex = - MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_oplogNeededForCrashRecoveryMutex"); - AtomicWord _oplogNeededForCrashRecovery; + // This flag allows the checkpoint thread to wake up early when _condvar is signaled. + bool _triggerCheckpoint = false; }; namespace { -- cgit v1.2.1