summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/wiredtiger
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/storage/wiredtiger')
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp24
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h6
2 files changed, 22 insertions, 8 deletions
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
index 42158b8dabc..63c5f4f66e1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
@@ -39,6 +39,7 @@
#include "mongo/stdx/mutex.h"
#include "mongo/util/concurrency/idle_thread_block.h"
#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
namespace mongo {
namespace {
@@ -100,7 +101,7 @@ void WiredTigerOplogManager::halt() {
}
void WiredTigerOplogManager::waitForAllEarlierOplogWritesToBeVisible(
- const WiredTigerRecordStore* oplogRecordStore, OperationContext* opCtx) const {
+ const WiredTigerRecordStore* oplogRecordStore, OperationContext* opCtx) {
invariant(opCtx->lockState()->isNoop() || !opCtx->lockState()->inAWriteUnitOfWork());
// In order to reliably detect rollback situations, we need to fetch the latestVisibleTimestamp
@@ -122,6 +123,12 @@ void WiredTigerOplogManager::waitForAllEarlierOplogWritesToBeVisible(
opCtx->recoveryUnit()->abandonSnapshot();
stdx::unique_lock<stdx::mutex> lk(_oplogVisibilityStateMutex);
+
+ // Prevent any scheduled journal flushes from being delayed and blocking this wait excessively.
+ _opsWaitingForVisibility++;
+ invariant(_opsWaitingForVisibility > 0);
+ auto exitGuard = MakeGuard([&] { _opsWaitingForVisibility--; });
+
opCtx->waitForConditionOrInterrupt(_opsBecameVisibleCV, lk, [&] {
auto newLatestVisibleTimestamp = getOplogReadTimestamp();
if (newLatestVisibleTimestamp < currentLatestVisibleTimestamp) {
@@ -177,15 +184,18 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop(
auto now = Date_t::now();
auto deadline = now + journalDelay;
auto shouldSyncOpsWaitingForJournal = [&] {
- return _shuttingDown || oplogRecordStore->haveCappedWaiters();
+ return _shuttingDown || _opsWaitingForVisibility ||
+ oplogRecordStore->haveCappedWaiters();
};
// Eventually it would be more optimal to merge this with the normal journal flushing
- // and block for oplog tailers to show up. For now this loop will poll once a
- // millisecond up to the journalDelay to see if we have any waiters yet. This reduces
- // sync-related I/O on the primary when secondaries are lagged, but will avoid
- // significant delays in confirming majority writes on replica sets with infrequent
- // writes.
+ // and block for either oplog tailers or operations waiting for oplog visibility. For
+ // now this loop will poll once a millisecond up to the journalDelay to see if we have
+ // any waiters yet. This reduces sync-related I/O on the primary when secondaries are
+ // lagged, but will avoid significant delays in confirming majority writes on replica
+ // sets with infrequent writes.
+ // Callers of waitForAllEarlierOplogWritesToBeVisible() like causally consistent reads
+ // will preempt this delay.
while (now < deadline &&
!_opsWaitingForJournalCV.wait_until(
lk, now.toSystemTimePoint(), shouldSyncOpsWaitingForJournal)) {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h
index 435b6b31f20..819faf80907 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h
@@ -76,7 +76,7 @@ public:
// Waits until all committed writes at this point to become visible (that is, no holes exist in
// the oplog.)
void waitForAllEarlierOplogWritesToBeVisible(const WiredTigerRecordStore* oplogRecordStore,
- OperationContext* opCtx) const;
+ OperationContext* opCtx);
// Returns the all committed timestamp. All transactions with timestamps earlier than the
// all committed timestamp are committed.
@@ -103,6 +103,10 @@ private:
RecordId _oplogMaxAtStartup = RecordId(0); // Guarded by oplogVisibilityStateMutex.
bool _opsWaitingForJournal = false; // Guarded by oplogVisibilityStateMutex.
+ // When greater than 0, indicates that there are operations waiting for oplog visibility, and
+ // journal flushing should not be delayed.
+ std::int64_t _opsWaitingForVisibility = 0; // Guarded by oplogVisibilityStateMutex.
+
AtomicUInt64 _oplogReadTimestamp;
};
} // namespace mongo