From 02087dc8d18138e9f9d555da21f6105046573ed1 Mon Sep 17 00:00:00 2001 From: Daniel Gottlieb Date: Wed, 24 Feb 2021 14:40:33 -0500 Subject: SERVER-54760: Guarantee causal relationship between WTOplogManager::waitForAllEarlierOplogWritesToBeVisible and `kAllDurableSnapshot` read source. --- src/mongo/db/storage/storage_engine.h | 7 ++++++ .../db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 22 +++++++++++++++++- .../wiredtiger/wiredtiger_oplog_manager.cpp | 27 ++-------------------- .../storage/wiredtiger/wiredtiger_oplog_manager.h | 4 ---- 4 files changed, 30 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index eb6f0025313..824a059bf89 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -59,6 +59,13 @@ struct StorageGlobalParams; */ class StorageEngine { public: + /** + * This is the minimum valid timestamp; it can be used for reads that need to see all + * untimestamped data but no timestamped data. We cannot use 0 here because 0 means see all + * timestamped data. + */ + static const uint64_t kMinimumTimestamp = 1; + /** * When the storage engine needs to know how much oplog to preserve for the sake of active * transactions, it executes a callback that returns either the oldest active transaction diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index a015e747028..bbdbfa9666f 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -2035,8 +2035,28 @@ StatusWith WiredTigerKVEngine::recoverToStableTimestamp(OperationCont return {stableTimestamp}; } +namespace { +uint64_t _fetchAllDurableValue(WT_CONNECTION* conn) { + // Fetch the latest all_durable value from the storage engine. This value will be a timestamp + // that has no holes (uncommitted transactions with lower timestamps) behind it. + char buf[(2 * 8 /*bytes in hex*/) + 1 /*nul terminator*/]; + auto wtstatus = conn->query_timestamp(conn, buf, "get=all_durable"); + if (wtstatus == WT_NOTFOUND) { + // Treat this as lowest possible timestamp; we need to see all preexisting data but no new + // (timestamped) data. + return StorageEngine::kMinimumTimestamp; + } else { + invariantWTOK(wtstatus); + } + + uint64_t tmp; + fassert(38002, parseNumberFromStringWithBase(buf, 16, &tmp)); + return tmp; +} +} // namespace + Timestamp WiredTigerKVEngine::getAllDurableTimestamp() const { - auto ret = _oplogManager->fetchAllDurableValue(_conn); + auto ret = _fetchAllDurableValue(_conn); stdx::lock_guard lk(_highestDurableTimestampMutex); if (ret < _highestSeenDurableTimestamp) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index 788310ab273..63949a1f5ee 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -42,11 +42,6 @@ #include "mongo/util/scopeguard.h" namespace mongo { -namespace { -// This is the minimum valid timestamp; it can be used for reads that need to see all untimestamped -// data but no timestamped data. We cannot use 0 here because 0 means see all timestamped data. -const uint64_t kMinimumTimestamp = 1; -} // namespace MONGO_FAIL_POINT_DEFINE(WTPausePrimaryOplogDurabilityLoop); @@ -68,7 +63,7 @@ void WiredTigerOplogManager::start(OperationContext* opCtx, LOG(1) << "Setting oplog visibility at startup. Val: " << oplogVisibility; } else { // Avoid setting oplog visibility to 0. That means "everything is visible". - setOplogReadTimestamp(Timestamp(kMinimumTimestamp)); + setOplogReadTimestamp(Timestamp(StorageEngine::kMinimumTimestamp)); } // Need to obtain the mutex before starting the thread, as otherwise it may race ahead @@ -207,7 +202,7 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop(WiredTigerSessionCache* ses _opsWaitingForJournal = false; lk.unlock(); - const uint64_t newTimestamp = fetchAllDurableValue(sessionCache->conn()); + const uint64_t newTimestamp = sessionCache->getKVEngine()->getAllDurableTimestamp().asULL(); // The newTimestamp may actually go backward during secondary batch application, // where we commit data file changes separately from oplog changes, so ignore @@ -249,22 +244,4 @@ void WiredTigerOplogManager::_setOplogReadTimestamp(WithLock, uint64_t newTimest LOG(2) << "Setting new oplogReadTimestamp: " << Timestamp(newTimestamp); } -uint64_t WiredTigerOplogManager::fetchAllDurableValue(WT_CONNECTION* conn) { - // Fetch the latest all_durable value from the storage engine. This value will be a timestamp - // that has no holes (uncommitted transactions with lower timestamps) behind it. - char buf[(2 * 8 /*bytes in hex*/) + 1 /*nul terminator*/]; - auto wtstatus = conn->query_timestamp(conn, buf, "get=all_durable"); - if (wtstatus == WT_NOTFOUND) { - // Treat this as lowest possible timestamp; we need to see all preexisting data but no new - // (timestamped) data. - return kMinimumTimestamp; - } else { - invariantWTOK(wtstatus); - } - - uint64_t tmp; - fassert(38002, parseNumberFromStringWithBase(buf, 16, &tmp)); - return tmp; -} - } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h index 3ba93550a52..21f4f22e03e 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h @@ -78,10 +78,6 @@ public: void waitForAllEarlierOplogWritesToBeVisible(const WiredTigerRecordStore* oplogRecordStore, OperationContext* opCtx); - // Returns the all_durable timestamp. All transactions with timestamps earlier than the - // all_durable timestamp are committed. - uint64_t fetchAllDurableValue(WT_CONNECTION* conn); - private: void _oplogJournalThreadLoop(WiredTigerSessionCache* sessionCache, WiredTigerRecordStore* oplogRecordStore); -- cgit v1.2.1