summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@mongodb.com>2021-04-26 22:31:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-03 14:42:22 +0000
commitb5c352d4ae62d8078ac4f59422f87bd8633180de (patch)
tree8a12dcf4413f08770eb1e10140699437f72c4db9
parent413af9c82279f4d6dbf802534674789cbcefae59 (diff)
downloadmongo-b5c352d4ae62d8078ac4f59422f87bd8633180de.tar.gz
SERVER-54005 Prevent oplog history truncation from deleting entries lte to the oplogTruncateAfterPoint
(cherry picked from commit c4065661e5c9c57e8d724243f5f07db72a827090)
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.cpp15
-rw-r--r--src/mongo/db/repl/storage_interface.h8
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp5
-rw-r--r--src/mongo/db/repl/storage_interface_impl.h3
-rw-r--r--src/mongo/db/repl/storage_interface_mock.h3
-rw-r--r--src/mongo/db/storage/biggie/biggie_kv_engine.h2
-rw-r--r--src/mongo/db/storage/devnull/devnull_kv_engine.h2
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h2
-rw-r--r--src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp2
-rw-r--r--src/mongo/db/storage/kv/kv_engine.h5
-rw-r--r--src/mongo/db/storage/storage_engine.h7
-rw-r--r--src/mongo/db/storage/storage_engine_impl.cpp4
-rw-r--r--src/mongo/db/storage/storage_engine_impl.h2
-rw-r--r--src/mongo/db/storage/storage_engine_mock.h2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp17
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h6
16 files changed, 80 insertions, 5 deletions
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
index 84c1fd78e47..df8c3ebdd6d 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
@@ -395,6 +395,15 @@ void ReplicationConsistencyMarkersImpl::setOplogTruncateAfterPoint(OperationCont
const Timestamp& timestamp) {
fassert(40512, _setOplogTruncateAfterPoint(opCtx, timestamp));
+ if (timestamp != Timestamp::min()) {
+ // Update the oplog pin so we don't delete oplog history past the oplogTruncateAfterPoint.
+ _storageInterface->setPinnedOplogTimestamp(opCtx, timestamp);
+ } else {
+ // Set Timestamp::max() to nullify the pin, rather than pinning all oplog history with a
+ // Timestamp::min().
+ _storageInterface->setPinnedOplogTimestamp(opCtx, Timestamp::max());
+ }
+
// If the oplogTruncateAfterPoint is manually reset via this function, then we need to clear the
// cached last no-holes oplog entry. This is important so that
// refreshOplogTruncateAfterPointIfPrimary always returns the latest oplog entry without
@@ -546,6 +555,12 @@ ReplicationConsistencyMarkersImpl::refreshOplogTruncateAfterPointIfPrimary(
4455501,
OpTimeAndWallTime::parseOpTimeAndWallTimeFromOplogEntry(truncateOplogEntryBSON.get()));
+ // Pass the _lastNoHolesOplogTimestamp timestamp down to the storage layer to prevent oplog
+ // history lte to oplogTruncateAfterPoint from being entirely deleted. There should always be a
+ // single oplog entry lte to the oplogTruncateAfterPoint. Otherwise there will not be a valid
+ // oplog entry with which to update the caller.
+ _storageInterface->setPinnedOplogTimestamp(opCtx, _lastNoHolesOplogTimestamp.get());
+
return _lastNoHolesOplogOpTimeAndWallTime;
}
diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h
index ed7c42d0002..5b6f8ec07ad 100644
--- a/src/mongo/db/repl/storage_interface.h
+++ b/src/mongo/db/repl/storage_interface.h
@@ -473,6 +473,14 @@ public:
* Returns the read timestamp of the recovery unit of the given operation context.
*/
virtual Timestamp getPointInTimeReadTimestamp(OperationContext* opCtx) const = 0;
+
+ /**
+ * Prevents oplog history at 'pinnedTimestamp' and later from being truncated. Setting
+ * Timestamp::max() effectively nullifies the pin because no oplog truncation will be stopped by
+ * it.
+ */
+ virtual void setPinnedOplogTimestamp(OperationContext* opCtx,
+ const Timestamp& pinnedTimestamp) const = 0;
};
} // namespace repl
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index bfe6df21dd1..329a0ee03a0 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -1358,5 +1358,10 @@ Timestamp StorageInterfaceImpl::getPointInTimeReadTimestamp(OperationContext* op
return *readTimestamp;
}
+void StorageInterfaceImpl::setPinnedOplogTimestamp(OperationContext* opCtx,
+ const Timestamp& pinnedTimestamp) const {
+ opCtx->getServiceContext()->getStorageEngine()->setPinnedOplogTimestamp(pinnedTimestamp);
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h
index 6e47c07071e..c9d4b7749fc 100644
--- a/src/mongo/db/repl/storage_interface_impl.h
+++ b/src/mongo/db/repl/storage_interface_impl.h
@@ -200,6 +200,9 @@ public:
Timestamp getPointInTimeReadTimestamp(OperationContext* opCtx) const override;
+ void setPinnedOplogTimestamp(OperationContext* opCtx,
+ const Timestamp& pinnedTimestamp) const override;
+
private:
const NamespaceString _rollbackIdNss;
};
diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h
index 806266662f5..11bb6213690 100644
--- a/src/mongo/db/repl/storage_interface_mock.h
+++ b/src/mongo/db/repl/storage_interface_mock.h
@@ -354,6 +354,9 @@ public:
return {};
}
+ void setPinnedOplogTimestamp(OperationContext* opCtx,
+ const Timestamp& pinnedTimestamp) const override {}
+
// Testing functions.
CreateCollectionForBulkFn createCollectionForBulkFn =
[](const NamespaceString& nss,
diff --git a/src/mongo/db/storage/biggie/biggie_kv_engine.h b/src/mongo/db/storage/biggie/biggie_kv_engine.h
index 2ce35d07cbf..6d117b71521 100644
--- a/src/mongo/db/storage/biggie/biggie_kv_engine.h
+++ b/src/mongo/db/storage/biggie/biggie_kv_engine.h
@@ -164,6 +164,8 @@ public:
*/
bool trySwapMaster(StringStore& newMaster, uint64_t version);
+ virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {}
+
private:
std::shared_ptr<void> _catalogInfo;
int _cachePressureForTest = 0;
diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.h b/src/mongo/db/storage/devnull/devnull_kv_engine.h
index 3f73b4e4ade..f038fb6a6fa 100644
--- a/src/mongo/db/storage/devnull/devnull_kv_engine.h
+++ b/src/mongo/db/storage/devnull/devnull_kv_engine.h
@@ -151,6 +151,8 @@ public:
return boost::none;
}
+ virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {}
+
private:
std::shared_ptr<void> _catalogInfo;
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h
index f505ee48655..efa6c5fd467 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h
@@ -123,6 +123,8 @@ public:
return boost::none;
}
+ void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {}
+
private:
typedef StringMap<std::shared_ptr<void>> DataMap;
diff --git a/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp b/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp
index 7f187993d7a..327d871ea98 100644
--- a/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp
+++ b/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp
@@ -128,6 +128,8 @@ public:
return boost::none;
}
+ void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {}
+
// List of idents removed using dropIdent().
std::vector<std::string> droppedIdents;
diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h
index 73ba5f004c6..a4621f635f0 100644
--- a/src/mongo/db/storage/kv/kv_engine.h
+++ b/src/mongo/db/storage/kv/kv_engine.h
@@ -476,6 +476,11 @@ public:
}
/**
+ * See `StorageEngine::setPinnedOplogTimestamp`
+ */
+ virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) = 0;
+
+ /**
* The destructor will never be called from mongod, but may be called from tests.
* Engines may assume that this will only be called in the case of clean shutdown, even if
* cleanShutdown() hasn't been called.
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index ffb4d9ebfc0..dc9db28e145 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -634,6 +634,13 @@ public:
virtual const KVEngine* getEngine() const = 0;
virtual DurableCatalog* getCatalog() = 0;
virtual const DurableCatalog* getCatalog() const = 0;
+
+ /**
+ * Prevents oplog history at 'pinnedTimestamp' and later from being truncated. Setting
+ * Timestamp::max() effectively nullifies the pin because no oplog truncation will be stopped by
+ * it.
+ */
+ virtual void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) = 0;
};
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index 749374487b1..1ea521e8392 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -1127,4 +1127,8 @@ int64_t StorageEngineImpl::sizeOnDiskForDb(OperationContext* opCtx, StringData d
return size;
}
+void StorageEngineImpl::setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {
+ _engine->setPinnedOplogTimestamp(pinnedTimestamp);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index ac2ec4d0c66..b0fc3df31d2 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -359,6 +359,8 @@ public:
int64_t sizeOnDiskForDb(OperationContext* opCtx, StringData dbName) override;
+ void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) override;
+
private:
using CollIter = std::list<std::string>::iterator;
diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h
index c893f2f93b6..86a450371ce 100644
--- a/src/mongo/db/storage/storage_engine_mock.h
+++ b/src/mongo/db/storage/storage_engine_mock.h
@@ -191,6 +191,8 @@ public:
const DurableCatalog* getCatalog() const final {
return nullptr;
}
+
+ void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) final {}
};
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index dce7709f497..cffeef5e474 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -641,6 +641,7 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName,
_inRepairMode(repair),
_readOnly(readOnly),
_keepDataHistory(serverGlobalParams.enableMajorityReadConcern) {
+ _pinnedOplogTimestamp.store(Timestamp::max().asULL());
boost::filesystem::path journalPath = path;
journalPath /= "journal";
if (_durable) {
@@ -2254,28 +2255,30 @@ boost::optional<Timestamp> WiredTigerKVEngine::getOplogNeededForCrashRecovery()
}
Timestamp WiredTigerKVEngine::getPinnedOplog() const {
+ // The storage engine may have been told to keep oplog back to a certain timestamp.
+ Timestamp pinned = Timestamp(_pinnedOplogTimestamp.load());
+
{
stdx::lock_guard<Latch> lock(_oplogPinnedByBackupMutex);
if (!storageGlobalParams.allowOplogTruncation) {
// If oplog truncation is not allowed, then return the min timestamp so that no history
- // is
- // ever allowed to be deleted.
+ // is ever allowed to be deleted.
return Timestamp::min();
}
if (_oplogPinnedByBackup) {
// All the oplog since `_oplogPinnedByBackup` should remain intact during the backup.
- return _oplogPinnedByBackup.get();
+ return std::min(_oplogPinnedByBackup.get(), pinned);
}
}
auto oplogNeededForCrashRecovery = getOplogNeededForCrashRecovery();
if (!_keepDataHistory) {
// We use rollbackViaRefetch, so we only need to pin oplog for crash recovery.
- return oplogNeededForCrashRecovery.value_or(Timestamp::max());
+ return std::min((oplogNeededForCrashRecovery.value_or(Timestamp::max())), pinned);
}
if (oplogNeededForCrashRecovery) {
- return oplogNeededForCrashRecovery.value();
+ return std::min(oplogNeededForCrashRecovery.value(), pinned);
}
auto status = getOplogNeededForRollback();
@@ -2287,6 +2290,10 @@ Timestamp WiredTigerKVEngine::getPinnedOplog() const {
return Timestamp::min();
}
+void WiredTigerKVEngine::setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) {
+ _pinnedOplogTimestamp.store(pinnedTimestamp.asULL());
+}
+
bool WiredTigerKVEngine::supportsReadConcernSnapshot() const {
return true;
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index 190fa57143e..e438b37fd95 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -352,6 +352,8 @@ public:
return _clockSource;
}
+ void setPinnedOplogTimestamp(const Timestamp& pinnedTimestamp) override;
+
private:
class WiredTigerSessionSweeper;
class WiredTigerCheckpointThread;
@@ -474,5 +476,9 @@ private:
mutable Mutex _highestDurableTimestampMutex =
MONGO_MAKE_LATCH("WiredTigerKVEngine::_highestDurableTimestampMutex");
mutable unsigned long long _highestSeenDurableTimestamp = StorageEngine::kMinimumTimestamp;
+
+ // Pins the oplog so that OplogStones will not truncate oplog history equal or newer to this
+ // timestamp.
+ AtomicWord<std::uint64_t> _pinnedOplogTimestamp;
};
} // namespace mongo