summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2021-06-10 21:12:14 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-26 03:11:17 +0000
commit6bdae3b2899c0123e37172410ebe021e90cd2148 (patch)
tree8bbbe4fe4c1f635842972dbd001a9c88cfa4be88
parentef7f9c17240ea65df5cd148a529901bab47f05ed (diff)
downloadmongo-6bdae3b2899c0123e37172410ebe021e90cd2148.tar.gz
SERVER-56620 Clear any durableOpTimeAndWallTime received from arbiters
(cherry picked from commit a85e78c4fe3734abf38b95348e0aa8d62ebdfdc5)
-rw-r--r--src/mongo/db/repl/repl_set_heartbeat_response.h5
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp19
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp158
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp24
4 files changed, 203 insertions, 3 deletions
diff --git a/src/mongo/db/repl/repl_set_heartbeat_response.h b/src/mongo/db/repl/repl_set_heartbeat_response.h
index 39e2a90c352..e62c2f3d4e2 100644
--- a/src/mongo/db/repl/repl_set_heartbeat_response.h
+++ b/src/mongo/db/repl/repl_set_heartbeat_response.h
@@ -186,6 +186,11 @@ public:
_durableOpTime = time.opTime;
_durableWallTime = time.wallTime;
}
+ void unsetDurableOpTimeAndWallTime() {
+ _durableOpTimeSet = false;
+ _durableOpTime = OpTime();
+ _durableWallTime = Date_t();
+ }
void setTerm(long long term) {
_term = term;
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index c36478959e9..451aaf94c95 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -242,6 +242,25 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
// after this function, so we cannot and don't need to wait for it to finish.
_processReplSetMetadata_inlock(replMetadata.getValue());
}
+
+ // Arbiters are always expected to report null durable optimes (and wall times).
+ // If that is not the case here, make sure to correct these times before ingesting them.
+ auto memberInConfig = _rsConfig.findMemberByHostAndPort(target);
+ if ((hbResponse.hasState() && hbResponse.getState().arbiter()) ||
+ (_rsConfig.isInitialized() && memberInConfig && memberInConfig->isArbiter())) {
+ if (hbResponse.hasDurableOpTime() &&
+ (!hbResponse.getDurableOpTime().isNull() ||
+ hbResponse.getDurableOpTimeAndWallTime().wallTime != Date_t())) {
+ LOGV2_FOR_HEARTBEATS(
+ 5662000,
+ 1,
+ "Received non-null durable optime/walltime for arbiter from heartbeat. "
+ "Ignoring value(s).",
+ "target"_attr = target,
+ "durableOpTimeAndWallTime"_attr = hbResponse.getDurableOpTimeAndWallTime());
+ hbResponse.unsetDurableOpTimeAndWallTime();
+ }
+ }
}
const Date_t now = _replExecutor->now();
Microseconds networkTime(0);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 6dfaebf5287..4fd00274f8f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -44,6 +44,8 @@
#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
#include "mongo/db/operation_context_noop.h"
#include "mongo/db/repl/bson_extract_optime.h"
+#include "mongo/db/repl/data_replicator_external_state_impl.h"
+#include "mongo/db/repl/heartbeat_response_action.h"
#include "mongo/db/repl/is_master_response.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/read_concern_args.h"
@@ -7803,6 +7805,162 @@ TEST_F(ReplCoordTest, NodeFailsVoteRequestIfCandidateIndexIsInvalid) {
}
}
+TEST_F(ReplCoordTest, IgnoreNonNullDurableOpTimeOrWallTimeForArbiterFromReplSetUpdatePosition) {
+ init("mySet/node1:12345,node2:12345,node3:12345");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345"
+ << "arbiterOnly" << true))),
+ HostAndPort("node1", 12345));
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ const auto repl = getReplCoord();
+
+ OpTimeWithTermOne opTime1(100, 1);
+ OpTimeWithTermOne opTime2(200, 2);
+ Date_t wallTime1 = Date_t() + Seconds(10);
+ Date_t wallTime2 = Date_t() + Seconds(20);
+
+ // Node 1 is ahead, nodes 2 and 3 a bit behind.
+ // Node 3 should not have a durable optime/walltime as they are an arbiter.
+ repl->setMyLastAppliedOpTimeAndWallTime({opTime2, wallTime2});
+ repl->setMyLastDurableOpTimeAndWallTime({opTime2, wallTime2});
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, opTime1, wallTime1));
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 3, opTime1, wallTime1));
+ ASSERT_OK(repl->setLastDurableOptime_forTest(1, 2, opTime1, wallTime1));
+ ASSERT_OK(repl->setLastDurableOptime_forTest(1, 3, OpTime(), Date_t()));
+
+ simulateSuccessfulV1Election();
+ ASSERT_TRUE(repl->getMemberState().primary());
+
+ // Receive updates on behalf of nodes 2 and 3 from node 2.
+ // Node 3 will be reported as caught up both in lastApplied and lastDurable, but we
+ // must ignore the lastDurable part as null is the only valid value for arbiters.
+ long long configVersion = repl->getConfig().getConfigVersion();
+ UpdatePositionArgs updatePositionArgs;
+
+ ASSERT_OK(updatePositionArgs.initialize(BSON(
+ UpdatePositionArgs::kCommandFieldName
+ << 1 << UpdatePositionArgs::kUpdateArrayFieldName
+ << BSON_ARRAY(
+ BSON(UpdatePositionArgs::kConfigVersionFieldName
+ << repl->getConfig().getConfigVersion()
+ << UpdatePositionArgs::kMemberIdFieldName << 2
+ << UpdatePositionArgs::kAppliedOpTimeFieldName << opTime2.asOpTime().toBSON()
+ << UpdatePositionArgs::kAppliedWallTimeFieldName << wallTime2
+ << UpdatePositionArgs::kDurableOpTimeFieldName << opTime2.asOpTime().toBSON()
+ << UpdatePositionArgs::kDurableWallTimeFieldName << wallTime2)
+ << BSON(UpdatePositionArgs::kConfigVersionFieldName
+ << repl->getConfig().getConfigVersion()
+ << UpdatePositionArgs::kMemberIdFieldName << 3
+ << UpdatePositionArgs::kAppliedOpTimeFieldName << opTime2.asOpTime().toBSON()
+ << UpdatePositionArgs::kAppliedWallTimeFieldName << wallTime2
+ << UpdatePositionArgs::kDurableOpTimeFieldName << opTime2.asOpTime().toBSON()
+ << UpdatePositionArgs::kDurableWallTimeFieldName << wallTime2)))));
+
+ startCapturingLogMessages();
+ ASSERT_OK(repl->processReplSetUpdatePosition(updatePositionArgs, &configVersion));
+
+ // Make sure node 2 is fully caught up but node 3 has null durable optime/walltime.
+ auto memberDataVector = repl->getMemberData();
+ for (auto member : memberDataVector) {
+ auto memberId = member.getMemberId();
+ if (memberId == MemberId(1) || memberId == MemberId(2)) {
+ ASSERT_EQ(member.getLastAppliedOpTime(), opTime2.asOpTime());
+ ASSERT_EQ(member.getLastAppliedWallTime(), wallTime2);
+ ASSERT_EQ(member.getLastDurableOpTime(), opTime2.asOpTime());
+ ASSERT_EQ(member.getLastDurableWallTime(), wallTime2);
+ continue;
+ } else if (member.getMemberId() == MemberId(3)) {
+ ASSERT_EQ(member.getLastAppliedOpTime(), opTime2.asOpTime());
+ ASSERT_EQ(member.getLastAppliedWallTime(), wallTime2);
+ ASSERT_EQ(member.getLastDurableOpTime(), OpTime());
+ ASSERT_EQ(member.getLastDurableWallTime(), Date_t());
+ continue;
+ }
+ MONGO_UNREACHABLE;
+ }
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(
+ 1,
+ countTextFormatLogLinesContaining(
+ "Received non-null durable optime/walltime for arbiter from replSetUpdatePosition"));
+}
+
+TEST_F(ReplCoordTest, IgnoreNonNullDurableOpTimeOrWallTimeForArbiterFromHeartbeat) {
+ unittest::MinimumLoggedSeverityGuard severityGuard{logv2::LogComponent::kReplication,
+ logv2::LogSeverity::Debug(1)};
+ init("mySet/node1:12345,node2:12345");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "arbiterOnly" << true))),
+ HostAndPort("node1", 12345));
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ const auto repl = getReplCoord();
+
+ OpTimeWithTermOne opTime1(100, 1);
+ OpTimeWithTermOne opTime2(200, 2);
+ Date_t wallTime1 = Date_t() + Seconds(10);
+ Date_t wallTime2 = Date_t() + Seconds(20);
+
+ // Node 1 is ahead, nodes 2 is a bit behind.
+ // Node 2 should not have a durable optime/walltime as they are an arbiter.
+ repl->setMyLastAppliedOpTimeAndWallTime({opTime2, wallTime2});
+ repl->setMyLastDurableOpTimeAndWallTime({opTime2, wallTime2});
+ ASSERT_OK(repl->setLastAppliedOptime_forTest(1, 2, opTime1, wallTime1));
+ ASSERT_OK(repl->setLastDurableOptime_forTest(1, 2, OpTime(), Date_t()));
+
+ simulateSuccessfulV1Election();
+ ASSERT_TRUE(repl->getMemberState().primary());
+
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName("mySet");
+ hbResp.setTerm(1);
+ hbResp.setConfigVersion(2);
+ hbResp.setConfigTerm(1);
+ hbResp.setState(MemberState::RS_ARBITER);
+ hbResp.setAppliedOpTimeAndWallTime({opTime2, wallTime2});
+ hbResp.setDurableOpTimeAndWallTime({opTime2, wallTime2});
+
+ startCapturingLogMessages();
+ repl->handleHeartbeatResponse_forTest(
+ hbResp.toBSON(), 1 /* targetIndex */, Milliseconds(5) /* ping */);
+
+ auto memberDataVector = repl->getMemberData();
+ for (auto member : memberDataVector) {
+ auto memberId = member.getMemberId();
+ if (memberId == MemberId(1)) {
+ ASSERT_EQ(member.getLastAppliedOpTime(), opTime2.asOpTime());
+ ASSERT_EQ(member.getLastAppliedWallTime(), wallTime2);
+ ASSERT_EQ(member.getLastDurableOpTime(), opTime2.asOpTime());
+ ASSERT_EQ(member.getLastDurableWallTime(), wallTime2);
+ continue;
+ } else if (member.getMemberId() == MemberId(2)) {
+ ASSERT_EQ(member.getLastAppliedOpTime(), opTime2.asOpTime());
+ ASSERT_EQ(member.getLastAppliedWallTime(), wallTime2);
+ ASSERT_EQ(member.getLastDurableOpTime(), OpTime());
+ ASSERT_EQ(member.getLastDurableWallTime(), Date_t());
+ continue;
+ }
+ MONGO_UNREACHABLE;
+ }
+
+ stopCapturingLogMessages();
+ ASSERT_EQUALS(1,
+ countTextFormatLogLinesContaining(
+ "Received non-null durable optime/walltime for arbiter from heartbeat"));
+}
+
// TODO(schwerin): Unit test election id updating
} // namespace
} // namespace repl
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 6dd1b2d6117..d46bf2b3c9a 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1325,6 +1325,24 @@ StatusWith<bool> TopologyCoordinator::setLastOptime(const UpdatePositionArgs::Up
invariant(memberId == memberData->getMemberId());
+ auto durableOpTime = args.durableOpTime;
+ auto durableWallTime = args.durableWallTime;
+
+ // Arbiters are always expected to report null durable optimes (and wall times).
+ // If that is not the case here, make sure to correct these times before ingesting them.
+ auto& memberInConfig = _rsConfig.getMemberAt(memberData->getConfigIndex());
+ if ((memberData->getState().arbiter() || memberInConfig.isArbiter()) &&
+ (!args.durableOpTime.isNull() || args.durableWallTime != Date_t())) {
+ LOGV2(5662001,
+ "Received non-null durable optime/walltime for arbiter from "
+ "replSetUpdatePosition. Ignoring value(s).",
+ "memberId"_attr = memberId,
+ "durableOpTime"_attr = args.durableOpTime,
+ "durableWallTime"_attr = args.durableWallTime);
+ durableOpTime = OpTime();
+ durableWallTime = Date_t();
+ }
+
LOGV2_DEBUG(21815,
3,
"Node with memberID {memberId} currently has optime {oldLastAppliedOpTime} "
@@ -1335,12 +1353,12 @@ StatusWith<bool> TopologyCoordinator::setLastOptime(const UpdatePositionArgs::Up
"oldLastAppliedOpTime"_attr = memberData->getLastAppliedOpTime(),
"oldLastDurableOpTime"_attr = memberData->getLastDurableOpTime(),
"newAppliedOpTime"_attr = args.appliedOpTime,
- "newDurableOpTime"_attr = args.durableOpTime);
+ "newDurableOpTime"_attr = durableOpTime);
bool advancedOpTime = memberData->advanceLastAppliedOpTimeAndWallTime(
{args.appliedOpTime, args.appliedWallTime}, now);
- advancedOpTime = memberData->advanceLastDurableOpTimeAndWallTime(
- {args.durableOpTime, args.durableWallTime}, now) ||
+ advancedOpTime =
+ memberData->advanceLastDurableOpTimeAndWallTime({durableOpTime, durableWallTime}, now) ||
advancedOpTime;
return advancedOpTime;
}