diff options
Diffstat (limited to 'src/mongo/db/storage/wiredtiger')
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h | 6 |
2 files changed, 22 insertions, 8 deletions
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index 42158b8dabc..63c5f4f66e1 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -39,6 +39,7 @@ #include "mongo/stdx/mutex.h" #include "mongo/util/concurrency/idle_thread_block.h" #include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" namespace mongo { namespace { @@ -100,7 +101,7 @@ void WiredTigerOplogManager::halt() { } void WiredTigerOplogManager::waitForAllEarlierOplogWritesToBeVisible( - const WiredTigerRecordStore* oplogRecordStore, OperationContext* opCtx) const { + const WiredTigerRecordStore* oplogRecordStore, OperationContext* opCtx) { invariant(opCtx->lockState()->isNoop() || !opCtx->lockState()->inAWriteUnitOfWork()); // In order to reliably detect rollback situations, we need to fetch the latestVisibleTimestamp @@ -122,6 +123,12 @@ void WiredTigerOplogManager::waitForAllEarlierOplogWritesToBeVisible( opCtx->recoveryUnit()->abandonSnapshot(); stdx::unique_lock<stdx::mutex> lk(_oplogVisibilityStateMutex); + + // Prevent any scheduled journal flushes from being delayed and blocking this wait excessively. + _opsWaitingForVisibility++; + invariant(_opsWaitingForVisibility > 0); + auto exitGuard = MakeGuard([&] { _opsWaitingForVisibility--; }); + opCtx->waitForConditionOrInterrupt(_opsBecameVisibleCV, lk, [&] { auto newLatestVisibleTimestamp = getOplogReadTimestamp(); if (newLatestVisibleTimestamp < currentLatestVisibleTimestamp) { @@ -177,15 +184,18 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop( auto now = Date_t::now(); auto deadline = now + journalDelay; auto shouldSyncOpsWaitingForJournal = [&] { - return _shuttingDown || oplogRecordStore->haveCappedWaiters(); + return _shuttingDown || _opsWaitingForVisibility || + oplogRecordStore->haveCappedWaiters(); }; // Eventually it would be more optimal to merge this with the normal journal flushing - // and block for oplog tailers to show up. For now this loop will poll once a - // millisecond up to the journalDelay to see if we have any waiters yet. This reduces - // sync-related I/O on the primary when secondaries are lagged, but will avoid - // significant delays in confirming majority writes on replica sets with infrequent - // writes. + // and block for either oplog tailers or operations waiting for oplog visibility. For + // now this loop will poll once a millisecond up to the journalDelay to see if we have + // any waiters yet. This reduces sync-related I/O on the primary when secondaries are + // lagged, but will avoid significant delays in confirming majority writes on replica + // sets with infrequent writes. + // Callers of waitForAllEarlierOplogWritesToBeVisible() like causally consistent reads + // will preempt this delay. while (now < deadline && !_opsWaitingForJournalCV.wait_until( lk, now.toSystemTimePoint(), shouldSyncOpsWaitingForJournal)) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h index 435b6b31f20..819faf80907 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h @@ -76,7 +76,7 @@ public: // Waits until all committed writes at this point to become visible (that is, no holes exist in // the oplog.) void waitForAllEarlierOplogWritesToBeVisible(const WiredTigerRecordStore* oplogRecordStore, - OperationContext* opCtx) const; + OperationContext* opCtx); // Returns the all committed timestamp. All transactions with timestamps earlier than the // all committed timestamp are committed. @@ -103,6 +103,10 @@ private: RecordId _oplogMaxAtStartup = RecordId(0); // Guarded by oplogVisibilityStateMutex. bool _opsWaitingForJournal = false; // Guarded by oplogVisibilityStateMutex. + // When greater than 0, indicates that there are operations waiting for oplog visibility, and + // journal flushing should not be delayed. + std::int64_t _opsWaitingForVisibility = 0; // Guarded by oplogVisibilityStateMutex. + AtomicUInt64 _oplogReadTimestamp; }; } // namespace mongo |