diff options
author | Dianna Hohensee <dianna.hohensee@mongodb.com> | 2021-04-26 22:31:27 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-03 14:42:22 +0000 |
commit | b5c352d4ae62d8078ac4f59422f87bd8633180de (patch) | |
tree | 8a12dcf4413f08770eb1e10140699437f72c4db9 | |
parent | 413af9c82279f4d6dbf802534674789cbcefae59 (diff) | |
download | mongo-b5c352d4ae62d8078ac4f59422f87bd8633180de.tar.gz |
SERVER-54005 Prevent oplog history truncation from deleting entries lte to the oplogTruncateAfterPoint
(cherry picked from commit c4065661e5c9c57e8d724243f5f07db72a827090)
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_impl.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/repl/storage_interface.h | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/storage_interface_impl.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/storage_interface_impl.h | 3 | ||||
-rw-r--r-- | src/mongo/db/repl/storage_interface_mock.h | 3 | ||||
-rw-r--r-- | src/mongo/db/storage/biggie/biggie_kv_engine.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/devnull/devnull_kv_engine.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/kv_engine.h | 5 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine.h | 7 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_impl.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_impl.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine_mock.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h | 6 |
16 files changed, 80 insertions, 5 deletions
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp index 84c1fd78e47..df8c3ebdd6d 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp @@ -395,6 +395,15 @@ void ReplicationConsistencyMarkersImpl::setOplogTruncateAfterPoint(OperationCont const Timestamp& timestamp) { fassert(40512, _setOplogTruncateAfterPoint(opCtx, timestamp)); + if (timestamp != Timestamp::min()) { + // Update the oplog pin so we don't delete oplog history past the oplogTruncateAfterPoint. + _storageInterface->setPinnedOplogTimestamp(opCtx, timestamp); + } else { + // Set Timestamp::max() to nullify the pin, rather than pinning all oplog history with a + // Timestamp::min(). + _storageInterface->setPinnedOplogTimestamp(opCtx, Timestamp::max()); + } + // If the oplogTruncateAfterPoint is manually reset via this function, then we need to clear the // cached last no-holes oplog entry. This is important so that // refreshOplogTruncateAfterPointIfPrimary always returns the latest oplog entry without @@ -546,6 +555,12 @@ ReplicationConsistencyMarkersImpl::refreshOplogTruncateAfterPointIfPrimary( 4455501, OpTimeAndWallTime::parseOpTimeAndWallTimeFromOplogEntry(truncateOplogEntryBSON.get())); + // Pass the _lastNoHolesOplogTimestamp timestamp down to the storage layer to prevent oplog + // history lte to oplogTruncateAfterPoint from being entirely deleted. There should always be a + // single oplog entry lte to the oplogTruncateAfterPoint. Otherwise there will not be a valid + // oplog entry with which to update the caller. + _storageInterface->setPinnedOplogTimestamp(opCtx, _lastNoHolesOplogTimestamp.get()); + return _lastNoHolesOplogOpTimeAndWallTime; } diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h index ed7c42d0002..5b6f8ec07ad 100644 --- a/src/mongo/db/repl/storage_interface.h +++ b/src/mongo/db/repl/storage_interface.h @@ -473,6 +473,14 @@ public: * Returns the read timestamp of the recovery unit of the given operation context. */ virtual Timestamp getPointInTimeReadTimestamp(OperationContext* opCtx) const = 0; + + /** + * Prevents oplog history at 'pinnedTimestamp' and later from being truncated. Setting + * Timestamp::max() effectively nullifies the pin because no oplog truncation will be stopped by + * it. + */ + virtual void setPinnedOplogTimestamp(OperationContext* opCtx, + const Timestamp& pinnedTimestamp) const = 0; }; } // namespace repl diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index bfe6df21dd1..329a0ee03a0 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -1358,5 +1358,10 @@ Timestamp StorageInterfaceImpl::getPointInTimeReadTimestamp(OperationContext* op return *readTimestamp; } +void StorageInterfaceImpl::setPinnedOplogTimestamp(OperationContext* opCtx, + const Timestamp& pinnedTimestamp) const { + opCtx->getServiceContext()->getStorageEngine()->setPinnedOplogTimestamp(pinnedTimestamp); +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h index 6e47c07071e..c9d4b7749fc 100644 --- a/src/mongo/db/repl/storage_interface_impl.h +++ b/src/mongo/db/repl/storage_interface_impl.h @@ -200,6 +200,9 @@ public: Timestamp getPointInTimeReadTimestamp(OperationContext* opCtx) const override; + void setPinnedOplogTimestamp(OperationContext* opCtx, + const Timestamp& pinnedTimestamp) const override; + private: const NamespaceString _rollbackIdNss; }; diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h index 806266662f5..11bb6213690 100644 --- a/src/mongo/db/repl/storage_interface_mock.h +++ b/src/mongo/db/repl/storage_interface_mock.h @@ -354,6 +354,9 @@ public: return {}; } + void setPinnedOplogTimestamp(OperationContext* opCtx, + const Timestamp& pinnedTimestamp) const override {} + // Testing functions. CreateCollectionForBulkFn createCollectionForBulkFn = [](const NamespaceString& nss, diff --git a/src/mongo/db/storage/biggie/biggie_kv_engine.h b/src/mongo/db/storage/biggie/biggie_kv_engine.h index 2ce35d07cbf..6d117b71521 100644 --- a/src/mongo/db/storage/biggie/biggie_kv_engine.h +++ b/src/mongo/db/storage/biggie/biggie_kv_engine.h @@ -164,6 +164,8 @@ public: */ bool trySwapMaster(StringStore& newMaster, uint64_t version); + virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {} + private: std::shared_ptr<void> _catalogInfo; int _cachePressureForTest = 0; diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.h b/src/mongo/db/storage/devnull/devnull_kv_engine.h index 3f73b4e4ade..f038fb6a6fa 100644 --- a/src/mongo/db/storage/devnull/devnull_kv_engine.h +++ b/src/mongo/db/storage/devnull/devnull_kv_engine.h @@ -151,6 +151,8 @@ public: return boost::none; } + virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {} + private: std::shared_ptr<void> _catalogInfo; diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h index f505ee48655..efa6c5fd467 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h @@ -123,6 +123,8 @@ public: return boost::none; } + void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {} + private: typedef StringMap<std::shared_ptr<void>> DataMap; diff --git a/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp b/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp index 7f187993d7a..327d871ea98 100644 --- a/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp +++ b/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp @@ -128,6 +128,8 @@ public: return boost::none; } + void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {} + // List of idents removed using dropIdent(). std::vector<std::string> droppedIdents; diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index 73ba5f004c6..a4621f635f0 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -476,6 +476,11 @@ public: } /** + * See `StorageEngine::setPinnedOplogTimestamp` + */ + virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) = 0; + + /** * The destructor will never be called from mongod, but may be called from tests. * Engines may assume that this will only be called in the case of clean shutdown, even if * cleanShutdown() hasn't been called. diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index ffb4d9ebfc0..dc9db28e145 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -634,6 +634,13 @@ public: virtual const KVEngine* getEngine() const = 0; virtual DurableCatalog* getCatalog() = 0; virtual const DurableCatalog* getCatalog() const = 0; + + /** + * Prevents oplog history at 'pinnedTimestamp' and later from being truncated. Setting + * Timestamp::max() effectively nullifies the pin because no oplog truncation will be stopped by + * it. + */ + virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) = 0; }; } // namespace mongo diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index 749374487b1..1ea521e8392 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -1127,4 +1127,8 @@ int64_t StorageEngineImpl::sizeOnDiskForDb(OperationContext* opCtx, StringData d return size; } +void StorageEngineImpl::setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) { + _engine->setPinnedOplogTimestamp(pinnedTimestamp); +} + } // namespace mongo diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h index ac2ec4d0c66..b0fc3df31d2 100644 --- a/src/mongo/db/storage/storage_engine_impl.h +++ b/src/mongo/db/storage/storage_engine_impl.h @@ -359,6 +359,8 @@ public: int64_t sizeOnDiskForDb(OperationContext* opCtx, StringData dbName) override; + void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) override; + private: using CollIter = std::list<std::string>::iterator; diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h index c893f2f93b6..86a450371ce 100644 --- a/src/mongo/db/storage/storage_engine_mock.h +++ b/src/mongo/db/storage/storage_engine_mock.h @@ -191,6 +191,8 @@ public: const DurableCatalog* getCatalog() const final { return nullptr; } + + void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) final {} }; } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index dce7709f497..cffeef5e474 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -641,6 +641,7 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName, _inRepairMode(repair), _readOnly(readOnly), _keepDataHistory(serverGlobalParams.enableMajorityReadConcern) { + _pinnedOplogTimestamp.store(Timestamp::max().asULL()); boost::filesystem::path journalPath = path; journalPath /= "journal"; if (_durable) { @@ -2254,28 +2255,30 @@ boost::optional<Timestamp> WiredTigerKVEngine::getOplogNeededForCrashRecovery() } Timestamp WiredTigerKVEngine::getPinnedOplog() const { + // The storage engine may have been told to keep oplog back to a certain timestamp. + Timestamp pinned = Timestamp(_pinnedOplogTimestamp.load()); + { stdx::lock_guard<Latch> lock(_oplogPinnedByBackupMutex); if (!storageGlobalParams.allowOplogTruncation) { // If oplog truncation is not allowed, then return the min timestamp so that no history - // is - // ever allowed to be deleted. + // is ever allowed to be deleted. return Timestamp::min(); } if (_oplogPinnedByBackup) { // All the oplog since `_oplogPinnedByBackup` should remain intact during the backup. - return _oplogPinnedByBackup.get(); + return std::min(_oplogPinnedByBackup.get(), pinned); } } auto oplogNeededForCrashRecovery = getOplogNeededForCrashRecovery(); if (!_keepDataHistory) { // We use rollbackViaRefetch, so we only need to pin oplog for crash recovery. - return oplogNeededForCrashRecovery.value_or(Timestamp::max()); + return std::min((oplogNeededForCrashRecovery.value_or(Timestamp::max())), pinned); } if (oplogNeededForCrashRecovery) { - return oplogNeededForCrashRecovery.value(); + return std::min(oplogNeededForCrashRecovery.value(), pinned); } auto status = getOplogNeededForRollback(); @@ -2287,6 +2290,10 @@ Timestamp WiredTigerKVEngine::getPinnedOplog() const { return Timestamp::min(); } +void WiredTigerKVEngine::setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) { + _pinnedOplogTimestamp.store(pinnedTimestamp.asULL()); +} + bool WiredTigerKVEngine::supportsReadConcernSnapshot() const { return true; } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 190fa57143e..e438b37fd95 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -352,6 +352,8 @@ public: return _clockSource; } + void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) override; + private: class WiredTigerSessionSweeper; class WiredTigerCheckpointThread; @@ -474,5 +476,9 @@ private: mutable Mutex _highestDurableTimestampMutex = MONGO_MAKE_LATCH("WiredTigerKVEngine::_highestDurableTimestampMutex"); mutable unsigned long long _highestSeenDurableTimestamp = StorageEngine::kMinimumTimestamp; + + // Pins the oplog so that OplogStones will not truncate oplog history equal or newer to this + // timestamp. + AtomicWord<std::uint64_t> _pinnedOplogTimestamp; }; } // namespace mongo |