summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatt dannenberg <matt.dannenberg@10gen.com>2015-11-24 06:33:08 -0500
committermatt dannenberg <matt.dannenberg@10gen.com>2015-11-24 09:21:24 -0500
commite8f4a2ea35060e97281221f3b1457ab7106e631e (patch)
tree4abc6c4dc5452333d24424712f943c56b400b0b4
parentcdd8c86aaae0da573bbea2e3c86267d42e8d6b56 (diff)
downloadmongo-e8f4a2ea35060e97281221f3b1457ab7106e631e.tar.gz
SERVER-21249 only restart heartbeats once when a node cannot find a syncsource
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp21
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h9
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp19
3 files changed, 29 insertions, 20 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 2e5f6feeb25..877910c792f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1460,8 +1460,7 @@ void ReplicationCoordinatorImpl::_stepDownContinue(
return;
}
stdx::lock_guard<stdx::mutex> lk(_mutex);
- _cancelHeartbeats_inlock();
- _startHeartbeats_inlock(cbData);
+ _restartHeartbeats_inlock(cbData);
}
void ReplicationCoordinatorImpl::_handleTimePassing(
@@ -2643,18 +2642,17 @@ void ReplicationCoordinatorImpl::_chooseNewSyncSource(
if (cbData.status == ErrorCodes::CallbackCanceled) {
return;
}
+
+ HostAndPort oldSyncSource = _topCoord->getSyncSourceAddress();
+
*newSyncSource = _topCoord->chooseNewSyncSource(_replExecutor.now(), lastTimestampFetched);
stdx::lock_guard<stdx::mutex> lock(_mutex);
- // If no sync source is found, schedule new heartbeats immediately to update our member state,
- // allowing us to make informed sync source decisions.
- if (newSyncSource->empty() && _justLostSyncSource && _selfIndex >= 0 &&
+ // If we lost our sync source, schedule new heartbeats immediately to update our knowledge
+ // of other members's state, allowing us to make informed sync source decisions.
+ if (newSyncSource->empty() && !oldSyncSource.empty() && _selfIndex >= 0 &&
!_getMemberState_inlock().primary()) {
- _cancelHeartbeats_inlock();
- _startHeartbeats_inlock(cbData);
- _justLostSyncSource = false;
- } else {
- _justLostSyncSource = true;
+ _restartHeartbeats_inlock(cbData);
}
}
@@ -3358,8 +3356,7 @@ void ReplicationCoordinatorImpl::_scheduleElectionWinNotification() {
return;
}
- _cancelHeartbeats_inlock();
- _startHeartbeats_inlock(cbData);
+ _restartHeartbeats_inlock(cbData);
};
auto cbStatus = _replExecutor.scheduleWork(electionWinNotificationCallback);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index d1cca5c96e3..a772d35e8ce 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -778,6 +778,12 @@ private:
void _cancelHeartbeats_inlock();
/**
+ * Cancels all heartbeats, then starts a heartbeat for each member in the current config.
+ * Called within the executor context.
+ */
+ void _restartHeartbeats_inlock(const ReplicationExecutor::CallbackArgs& cbData);
+
+ /**
* Asynchronously sends a heartbeat to "target". "targetIndex" is the index
* into the replica set config members array that corresponds to the "target", or -1 if
* we don't have a valid replica set config.
@@ -1380,9 +1386,6 @@ private:
// Cached copy of the current config protocol version.
AtomicInt64 _protVersion; // (S)
-
- // Prevents a busy loop of cancelling heartbeats when we have no sync source.
- bool _justLostSyncSource = true; // (M)
};
} // namespace repl
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index ba850f86eab..cfc8116e1b6 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -526,6 +526,12 @@ void ReplicationCoordinatorImpl::_cancelHeartbeats_inlock() {
}
}
+void ReplicationCoordinatorImpl::_restartHeartbeats_inlock(
+ const ReplicationExecutor::CallbackArgs& cbData) {
+ _cancelHeartbeats_inlock();
+ _startHeartbeats_inlock(cbData);
+}
+
void ReplicationCoordinatorImpl::_startHeartbeats_inlock(
const ReplicationExecutor::CallbackArgs& cbData) {
const Date_t now = _replExecutor.now();
@@ -575,8 +581,8 @@ void ReplicationCoordinatorImpl::_handleLivenessTimeout(
slaveInfo.down = true;
if (_memberState.primary()) {
- // Only adjust hbdata if we are primary, since only the primary has a full view of
- // the entire cluster.
+ // Only adjust hbdata if we are primary, since only the primary has a full view
+ // of the entire cluster.
// Secondaries might not see other secondaries in the cluster if they are not
// downstream.
HeartbeatResponseAction action =
@@ -604,7 +610,8 @@ void ReplicationCoordinatorImpl::_scheduleNextLivenessUpdate_inlock(
if (!isV1ElectionProtocol()) {
return;
}
- // Scan liveness table for earliest date; schedule a run at (that date plus election timeout).
+ // Scan liveness table for earliest date; schedule a run at (that date plus election
+ // timeout).
Date_t earliestDate = Date_t::max();
int earliestMemberId = -1;
for (auto&& slaveInfo : _slaveInfo) {
@@ -710,7 +717,8 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(bool isPriorityTake
return;
}
- // We should always reschedule this callback even if we do not make it to the election process.
+ // We should always reschedule this callback even if we do not make it to the election
+ // process.
{
stdx::lock_guard<stdx::mutex> lock(_mutex);
_cancelPriorityTakeover_inlock();
@@ -719,7 +727,8 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(bool isPriorityTake
if (!_topCoord->becomeCandidateIfElectable(_replExecutor.now(), getMyLastOptime())) {
if (isPriorityTakeOver) {
- log() << "Not starting an election for a priority takeover, since we are not electable";
+ log() << "Not starting an election for a priority takeover, since we are not "
+ "electable";
} else {
log() << "Not starting an election, since we are not electable";
}