summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha@mongodb.com>2017-11-17 17:00:15 -0500
committerMisha Tyulenev <misha@mongodb.com>2017-11-17 17:32:53 -0500
commitb995bacfcf036273fa186c42bb2895679c5af8ba (patch)
tree297792bba4f65d1950e64e07e13029f215fceff3
parenta0821b653d4beb879261b9232c66b95383dc86c6 (diff)
downloadmongo-b995bacfcf036273fa186c42bb2895679c5af8ba.tar.gz
SERVER-32006 add a parameter to wait before secondaries perform noop write
(cherry picked from commit a40d277a1c7a735e4d7ed5cf394e23181f8620fb)
-rw-r--r--src/mongo/db/read_concern.cpp16
-rw-r--r--src/mongo/db/repl/replication_coordinator.h10
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp32
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h17
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp5
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h3
6 files changed, 73 insertions, 10 deletions
diff --git a/src/mongo/db/read_concern.cpp b/src/mongo/db/read_concern.cpp
index 557c83c5f6a..4a11f36908f 100644
--- a/src/mongo/db/read_concern.cpp
+++ b/src/mongo/db/read_concern.cpp
@@ -102,6 +102,8 @@ private:
};
+MONGO_EXPORT_SERVER_PARAMETER(waitForSecondaryBeforeNoopWriteMS, int, 10);
+
/**
* Schedule a write via appendOplogNote command to the primary of this replica set.
*/
@@ -112,6 +114,20 @@ Status makeNoopWriteIfNeeded(OperationContext* opCtx, LogicalTime clusterTime) {
auto& writeRequests = getWriteRequestsSynchronizer(opCtx->getClient()->getServiceContext());
auto lastAppliedOpTime = LogicalTime(replCoord->getMyLastAppliedOpTime().getTimestamp());
+
+ // secondaries may lag primary so wait first to avoid unnecessary noop writes.
+ if (clusterTime > lastAppliedOpTime && replCoord->getMemberState().secondary()) {
+ auto deadline = Date_t::now() + Milliseconds(waitForSecondaryBeforeNoopWriteMS.load());
+ auto readConcernArgs =
+ repl::ReadConcernArgs(clusterTime, repl::ReadConcernLevel::kLocalReadConcern);
+ auto waitStatus = replCoord->waitUntilOpTimeForReadUntil(opCtx, readConcernArgs, deadline);
+ lastAppliedOpTime = LogicalTime(replCoord->getMyLastAppliedOpTime().getTimestamp());
+ if (!waitStatus.isOK()) {
+ LOG(1) << "Wait for clusterTime: " << clusterTime.toString()
+ << " until deadline: " << deadline << " failed with " << waitStatus.toString();
+ }
+ }
+
auto status = Status::OK();
int remainingAttempts = 3;
// this loop addresses the case when two or more threads need to advance the opLog time but the
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index e02a4efffae..403792a0f99 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -376,6 +376,16 @@ public:
const ReadConcernArgs& settings) = 0;
/**
+ * Waits until the deadline or until the optime of the current node is at least the opTime
+ * specified in 'settings'.
+ *
+ * Returns whether the wait was successful.
+ */
+ virtual Status waitUntilOpTimeForReadUntil(OperationContext* opCtx,
+ const ReadConcernArgs& settings,
+ boost::optional<Date_t> deadline) = 0;
+
+ /**
* Retrieves and returns the current election id, which is a unique id that is local to
* this node and changes every time we become primary.
* TODO(spencer): Use term instead.
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 27a10f50e1b..f50edc09f3c 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1176,6 +1176,12 @@ Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* opCt
return Status::OK();
}
+ return waitUntilOpTimeForReadUntil(opCtx, readConcern, boost::none);
+}
+
+Status ReplicationCoordinatorImpl::waitUntilOpTimeForReadUntil(OperationContext* opCtx,
+ const ReadConcernArgs& readConcern,
+ boost::optional<Date_t> deadline) {
if (getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) {
// For master/slave and standalone nodes, readAfterOpTime is not supported, so we return an
// error. However, we consider all writes "committed" and can treat MajorityReadConcern as
@@ -1185,7 +1191,7 @@ Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* opCt
}
if (readConcern.getArgsClusterTime()) {
- return _waitUntilClusterTimeForRead(opCtx, readConcern);
+ return _waitUntilClusterTimeForRead(opCtx, readConcern, deadline);
} else {
return _waitUntilOpTimeForReadDeprecated(opCtx, readConcern);
}
@@ -1193,7 +1199,8 @@ Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* opCt
Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx,
bool isMajorityReadConcern,
- OpTime targetOpTime) {
+ OpTime targetOpTime,
+ boost::optional<Date_t> deadline) {
if (!isMajorityReadConcern) {
// This assumes the read concern is "local" level.
// We need to wait for all committed writes to be visible, even in the oplog (which uses
@@ -1245,7 +1252,19 @@ Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx,
LOG(3) << "waitUntilOpTime: OpID " << opCtx->getOpID() << " is waiting for OpTime "
<< waiter << " until " << opCtx->getDeadline();
- auto waitStatus = opCtx->waitForConditionOrInterruptNoAssert(condVar, lock);
+ auto waitStatus = Status::OK();
+ if (deadline) {
+ auto waitUntilStatus =
+ opCtx->waitForConditionOrInterruptNoAssertUntil(condVar, lock, *deadline);
+ if (!waitUntilStatus.isOK()) {
+ waitStatus = waitUntilStatus.getStatus();
+ }
+ // If deadline is set no need to wait until the targetTime time is reached.
+ return waitStatus;
+ } else {
+ waitStatus = opCtx->waitForConditionOrInterruptNoAssert(condVar, lock);
+ }
+
if (!waitStatus.isOK()) {
return waitStatus;
}
@@ -1254,8 +1273,9 @@ Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx,
return Status::OK();
}
-Status ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead(
- OperationContext* opCtx, const ReadConcernArgs& readConcern) {
+Status ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead(OperationContext* opCtx,
+ const ReadConcernArgs& readConcern,
+ boost::optional<Date_t> deadline) {
auto clusterTime = *readConcern.getArgsClusterTime();
invariant(clusterTime != LogicalTime::kUninitialized);
@@ -1267,7 +1287,7 @@ Status ReplicationCoordinatorImpl::_waitUntilClusterTimeForRead(
const bool isMajorityReadConcern =
readConcern.getLevel() == ReadConcernLevel::kMajorityReadConcern;
- return _waitUntilOpTime(opCtx, isMajorityReadConcern, targetOpTime);
+ return _waitUntilOpTime(opCtx, isMajorityReadConcern, targetOpTime, deadline);
}
// TODO: remove when SERVER-29729 is done
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 69b57830d69..ff6053ce5a7 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -167,6 +167,10 @@ public:
virtual OpTime getMyLastAppliedOpTime() const override;
virtual OpTime getMyLastDurableOpTime() const override;
+ virtual Status waitUntilOpTimeForReadUntil(OperationContext* opCtx,
+ const ReadConcernArgs& readConcern,
+ boost::optional<Date_t> deadline) override;
+
virtual Status waitUntilOpTimeForRead(OperationContext* opCtx,
const ReadConcernArgs& readConcern) override;
@@ -1111,7 +1115,10 @@ private:
/**
* Waits until the optime of the current node is at least the 'opTime'.
*/
- Status _waitUntilOpTime(OperationContext* opCtx, bool isMajorityReadConcern, OpTime opTime);
+ Status _waitUntilOpTime(OperationContext* opCtx,
+ bool isMajorityReadConcern,
+ OpTime opTime,
+ boost::optional<Date_t> deadline = boost::none);
/**
* Waits until the optime of the current node is at least the opTime specified in 'readConcern'.
@@ -1122,11 +1129,13 @@ private:
const ReadConcernArgs& readConcern);
/**
- * Waits until the optime of the current node is at least the clusterTime specified in
- * 'readConcern'. Supports local and majority readConcern.
+ * Waits until the deadline or until the optime of the current node is at least the clusterTime
+ * specified in 'readConcern'. Supports local and majority readConcern.
+ * If maxTimeMS and deadline are both specified, it waits for min(maxTimeMS, deadline).
*/
Status _waitUntilClusterTimeForRead(OperationContext* opCtx,
- const ReadConcernArgs& readConcern);
+ const ReadConcernArgs& readConcern,
+ boost::optional<Date_t> deadline);
/**
* Returns a pseudorandom number no less than 0 and less than limit (which must be positive).
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index 1db09263d55..58a3efd664d 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -229,6 +229,11 @@ Status ReplicationCoordinatorMock::waitUntilOpTimeForRead(OperationContext* opCt
return Status::OK();
}
+Status ReplicationCoordinatorMock::waitUntilOpTimeForReadUntil(OperationContext* opCtx,
+ const ReadConcernArgs& settings,
+ boost::optional<Date_t> deadline) {
+ return Status::OK();
+}
OID ReplicationCoordinatorMock::getElectionId() {
// TODO
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 4472247de44..12bf54c1f6c 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -132,6 +132,9 @@ public:
virtual Status waitUntilOpTimeForRead(OperationContext* opCtx,
const ReadConcernArgs& settings) override;
+ virtual Status waitUntilOpTimeForReadUntil(OperationContext* opCtx,
+ const ReadConcernArgs& settings,
+ boost::optional<Date_t> deadline) override;
virtual OID getElectionId();
virtual OID getMyRID() const;