diff options
author | Misha Tyulenev <misha@mongodb.com> | 2017-11-17 17:00:15 -0500 |
---|---|---|
committer | Misha Tyulenev <misha@mongodb.com> | 2017-11-17 17:00:30 -0500 |
commit | a40d277a1c7a735e4d7ed5cf394e23181f8620fb (patch) | |
tree | 5fee3fc10036972f933d71c977e1cd4d89efe605 /src/mongo | |
parent | 97af8701b538754261e566b26fa22cb4b54710f3 (diff) | |
download | mongo-a40d277a1c7a735e4d7ed5cf394e23181f8620fb.tar.gz |
SERVER-32006 add a parameter to wait before secondaries perform noop write
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/read_concern.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator.h | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.h | 17 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_mock.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_mock.h | 3 |
6 files changed, 73 insertions, 10 deletions
diff --git a/src/mongo/db/read_concern.cpp b/src/mongo/db/read_concern.cpp index 557c83c5f6a..4a11f36908f 100644 --- a/src/mongo/db/read_concern.cpp +++ b/src/mongo/db/read_concern.cpp @@ -102,6 +102,8 @@ private: }; +MONGO_EXPORT_SERVER_PARAMETER(waitForSecondaryBeforeNoopWriteMS, int, 10); + /** * Schedule a write via appendOplogNote command to the primary of this replica set. */ @@ -112,6 +114,20 @@ Status makeNoopWriteIfNeeded(OperationContext* opCtx, LogicalTime clusterTime) { auto& writeRequests = getWriteRequestsSynchronizer(opCtx->getClient()->getServiceContext()); auto lastAppliedOpTime = LogicalTime(replCoord->getMyLastAppliedOpTime().getTimestamp()); + + // secondaries may lag primary so wait first to avoid unnecessary noop writes. + if (clusterTime > lastAppliedOpTime && replCoord->getMemberState().secondary()) { + auto deadline = Date_t::now() + Milliseconds(waitForSecondaryBeforeNoopWriteMS.load()); + auto readConcernArgs = + repl::ReadConcernArgs(clusterTime, repl::ReadConcernLevel::kLocalReadConcern); + auto waitStatus = replCoord->waitUntilOpTimeForReadUntil(opCtx, readConcernArgs, deadline); + lastAppliedOpTime = LogicalTime(replCoord->getMyLastAppliedOpTime().getTimestamp()); + if (!waitStatus.isOK()) { + LOG(1) << "Wait for clusterTime: " << clusterTime.toString() + << " until deadline: " << deadline << " failed with " << waitStatus.toString(); + } + } + auto status = Status::OK(); int remainingAttempts = 3; // this loop addresses the case when two or more threads need to advance the opLog time but the diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index e02a4efffae..403792a0f99 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -376,6 +376,16 @@ public: const ReadConcernArgs& settings) = 0; /** + * Waits until the deadline or until the optime of the current node is at least the opTime + * specified in 'settings'. + * + * Returns whether the wait was successful. + */ + virtual Status waitUntilOpTimeForReadUntil(OperationContext* opCtx, + const ReadConcernArgs& settings, + boost::optional<Date_t> deadline) = 0; + + /** * Retrieves and returns the current election id, which is a unique id that is local to * this node and changes every time we become primary. * TODO(spencer): Use term instead. diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 27a10f50e1b..f50edc09f3c 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1176,6 +1176,12 @@ Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* opCt return Status::OK(); } + return waitUntilOpTimeForReadUntil(opCtx, readConcern, boost::none); +} + +Status ReplicationCoordinatorImpl::waitUntilOpTimeForReadUntil(OperationContext* opCtx, + const ReadConcernArgs& readConcern, + boost::optional<Date_t> deadline) { if (getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) { // For master/slave and standalone nodes, readAfterOpTime is not supported, so we return an // error. However, we consider all writes "committed" and can treat MajorityReadConcern as @@ -1185,7 +1191,7 @@ Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* opCt } if (readConcern.getArgsClusterTime()) { - return _waitUntilClusterTimeForRead(opCtx, readConcern); + return _waitUntilClusterTimeForRead(opCtx, readConcern, deadline); } else { return _waitUntilOpTimeForReadDeprecated(opCtx, readConcern); } @@ -1193,7 +1199,8 @@ Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* opCt Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx, bool isMajorityReadConcern, - OpTime targetOpTime) { + OpTime targetOpTime, + boost::optional<Date_t> deadline) { if (!isMajorityReadConcern) { // This assumes the read concern is "local" level. // We need to wait for all committed writes to be visible, even in the oplog (which uses @@ -1245,7 +1252,19 @@ Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx, LOG(3) << "waitUntilOpTime: OpID " << opCtx->getOpID() << " is waiting for OpTime " << waiter << " until " << opCtx->getDeadline(); - auto waitStatus = opCtx->waitForConditionOrInterruptNoAssert(condVar, lock); + auto waitStatus = Status::OK(); + if (deadline) { + auto waitUntilStatus = + opCtx->waitForConditionOrInterruptNoAssertUntil(condVar, lock, *deadline); + if (!waitUntilStatus.isOK()) { + waitStatus = waitUntilStatus.getStatus(); + } + // If deadline is set no need to wait until the targetTime time is reached. + return waitStatus; + } else { + waitStatus = opCtx->waitForConditionOrInterruptNoAssert(condVar, lock); + } + if (!waitStatus.isOK()) { return waitStatus; } @@ -1254,8 +1273,9 @@ Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx, return Status::OK(); } -Status ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead( - OperationContext* opCtx, const ReadConcernArgs& readConcern) { +Status ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead(OperationContext* opCtx, + const ReadConcernArgs& readConcern, + boost::optional<Date_t> deadline) { auto clusterTime = *readConcern.getArgsClusterTime(); invariant(clusterTime != LogicalTime::kUninitialized); @@ -1267,7 +1287,7 @@ Status ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead( const bool isMajorityReadConcern = readConcern.getLevel() == ReadConcernLevel::kMajorityReadConcern; - return _waitUntilOpTime(opCtx, isMajorityReadConcern, targetOpTime); + return _waitUntilOpTime(opCtx, isMajorityReadConcern, targetOpTime, deadline); } // TODO: remove when SERVER-29729 is done diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 69b57830d69..ff6053ce5a7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -167,6 +167,10 @@ public: virtual OpTime getMyLastAppliedOpTime() const override; virtual OpTime getMyLastDurableOpTime() const override; + virtual Status waitUntilOpTimeForReadUntil(OperationContext* opCtx, + const ReadConcernArgs& readConcern, + boost::optional<Date_t> deadline) override; + virtual Status waitUntilOpTimeForRead(OperationContext* opCtx, const ReadConcernArgs& readConcern) override; @@ -1111,7 +1115,10 @@ private: /** * Waits until the optime of the current node is at least the 'opTime'. */ - Status _waitUntilOpTime(OperationContext* opCtx, bool isMajorityReadConcern, OpTime opTime); + Status _waitUntilOpTime(OperationContext* opCtx, + bool isMajorityReadConcern, + OpTime opTime, + boost::optional<Date_t> deadline = boost::none); /** * Waits until the optime of the current node is at least the opTime specified in 'readConcern'. @@ -1122,11 +1129,13 @@ private: const ReadConcernArgs& readConcern); /** - * Waits until the optime of the current node is at least the clusterTime specified in - * 'readConcern'. Supports local and majority readConcern. + * Waits until the deadline or until the optime of the current node is at least the clusterTime + * specified in 'readConcern'. Supports local and majority readConcern. + * If maxTimeMS and deadline are both specified, it waits for min(maxTimeMS, deadline). */ Status _waitUntilClusterTimeForRead(OperationContext* opCtx, - const ReadConcernArgs& readConcern); + const ReadConcernArgs& readConcern, + boost::optional<Date_t> deadline); /** * Returns a pseudorandom number no less than 0 and less than limit (which must be positive). diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 1db09263d55..58a3efd664d 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -229,6 +229,11 @@ Status ReplicationCoordinatorMock::waitUntilOpTimeForRead(OperationContext* opCt return Status::OK(); } +Status ReplicationCoordinatorMock::waitUntilOpTimeForReadUntil(OperationContext* opCtx, + const ReadConcernArgs& settings, + boost::optional<Date_t> deadline) { + return Status::OK(); +} OID ReplicationCoordinatorMock::getElectionId() { // TODO diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 4472247de44..12bf54c1f6c 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -132,6 +132,9 @@ public: virtual Status waitUntilOpTimeForRead(OperationContext* opCtx, const ReadConcernArgs& settings) override; + virtual Status waitUntilOpTimeForReadUntil(OperationContext* opCtx, + const ReadConcernArgs& settings, + boost::optional<Date_t> deadline) override; virtual OID getElectionId(); virtual OID getMyRID() const; |