summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2021-01-20 09:36:26 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-17 03:41:21 +0000
commit4fb715053b3ad308c85501e9e9d0a1169bc78556 (patch)
tree7c0ef47221fc3d68ae6fd41e0313a37b968f4e9c /src/mongo/db/repl
parentecbb91d31416fc9a68d896ea255f5494ca2a54d4 (diff)
downloadmongo-4fb715053b3ad308c85501e9e9d0a1169bc78556.tar.gz
SERVER-53612: Fix StepDown hangs when all nodes are caught up but none is immediately electable
(cherry picked from commit 6308db5c83a3e95f4532c63df8b635b8090036ae)
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/heartbeat_response_action.cpp4
-rw-r--r--src/mongo/db/repl/heartbeat_response_action.h15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp10
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp4
4 files changed, 30 insertions, 3 deletions
diff --git a/src/mongo/db/repl/heartbeat_response_action.cpp b/src/mongo/db/repl/heartbeat_response_action.cpp
index c21dbd0cf53..6b6bd4797a8 100644
--- a/src/mongo/db/repl/heartbeat_response_action.cpp
+++ b/src/mongo/db/repl/heartbeat_response_action.cpp
@@ -79,5 +79,9 @@ void HeartbeatResponseAction::setAdvancedOpTime(bool advanced) {
_advancedOpTime = advanced;
}
+void HeartbeatResponseAction::setBecameElectable(bool becameElectable) {
+ _becameElectable = becameElectable;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/heartbeat_response_action.h b/src/mongo/db/repl/heartbeat_response_action.h
index 9f0b3e4da8c..194b4bb0845 100644
--- a/src/mongo/db/repl/heartbeat_response_action.h
+++ b/src/mongo/db/repl/heartbeat_response_action.h
@@ -105,6 +105,12 @@ public:
*/
void setAdvancedOpTime(bool advanced);
+ /*
+ * Sets whether or not the member has transitioned from unelectable to electable since the last
+ * heartbeat response.
+ */
+ void setBecameElectable(bool becameElectable);
+
/**
* Gets the action type of this action.
*/
@@ -136,11 +142,20 @@ public:
return _advancedOpTime;
}
+ /*
+ * Returns true if the heartbeat response results in the member transitioning from unelectable
+ * to electable.
+ */
+ bool getBecameElectable() const {
+ return _becameElectable;
+ }
+
private:
Action _action;
int _primaryIndex;
Date_t _nextHeartbeatStartDate;
bool _advancedOpTime = false;
+ bool _becameElectable = false;
};
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index c993d696a0d..0b32eacc507 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -237,9 +237,13 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
if (action.getAction() == HeartbeatResponseAction::NoAction && hbStatusResponse.isOK() &&
hbStatusResponse.getValue().hasState() &&
- hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY &&
- action.getAdvancedOpTime()) {
- _updateLastCommittedOpTimeAndWallTime(lk);
+ hbStatusResponse.getValue().getState() != MemberState::RS_PRIMARY) {
+ if (action.getAdvancedOpTime()) {
+ _updateLastCommittedOpTimeAndWallTime(lk);
+ } else if (action.getBecameElectable() && _topCoord->isSteppingDown()) {
+ // Try to wake up the stepDown waiter when a new node becomes electable.
+ _wakeReadyWaiters(lk);
+ }
}
// Abort catchup if we have caught up to the latest known optime after heartbeat refreshing.
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 1df593d8330..727270bee7d 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -854,6 +854,7 @@ HeartbeatResponseAction TopologyCoordinator::processHeartbeatResponse(
MemberData& hbData = _memberData.at(memberIndex);
const MemberConfig member = _rsConfig.getMemberAt(memberIndex);
bool advancedOpTime = false;
+ bool becameElectable = false;
if (!hbResponse.isOK()) {
if (isUnauthorized) {
hbData.setAuthIssue(now);
@@ -871,7 +872,9 @@ HeartbeatResponseAction TopologyCoordinator::processHeartbeatResponse(
ReplSetHeartbeatResponse hbr = std::move(hbResponse.getValue());
LOG(3) << "setUpValues: heartbeat response good for member _id:" << member.getId();
pingsInConfig++;
+ auto wasUnelectable = hbData.isUnelectable();
advancedOpTime = hbData.setUpValues(now, std::move(hbr));
+ becameElectable = wasUnelectable && !hbData.isUnelectable();
}
HeartbeatResponseAction nextAction;
@@ -879,6 +882,7 @@ HeartbeatResponseAction TopologyCoordinator::processHeartbeatResponse(
nextAction.setNextHeartbeatStartDate(nextHeartbeatStartDate);
nextAction.setAdvancedOpTime(advancedOpTime);
+ nextAction.setBecameElectable(becameElectable);
return nextAction;
}