From 2070d8ca09576b858c61677bc366a25e7ecfecad Mon Sep 17 00:00:00 2001
From: Matthew Russotto <matthew.russotto@mongodb.com>
Date: Wed, 23 Oct 2019 14:48:52 +0000
Subject: SERVER-42385 Reduce log spam associated with election timeout resets.

---
 src/mongo/db/repl/replication_coordinator_impl.h   |  1 +
 .../replication_coordinator_impl_heartbeat.cpp     | 65 ++++++++++++-----
 .../db/repl/replication_coordinator_impl_test.cpp  | 85 ++++++++++++++++++++++
 .../repl/replication_coordinator_test_fixture.cpp  |  2 +
 4 files changed, 133 insertions(+), 20 deletions(-)

diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index b8ca6a8a262..17a74cfcd7f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1246,6 +1246,7 @@ private:
      * Callback which starts an election if this node is electable and using protocolVersion 1.
      */
     void _startElectSelfIfEligibleV1(StartElectionReasonEnum reason);
+    void _startElectSelfIfEligibleV1(WithLock, StartElectionReasonEnum reason);
 
     /**
      * Schedules work to be run no sooner than 'when' and returns handle to callback.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index a337b04507b..bc733ba3459 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -825,43 +825,68 @@ void ReplicationCoordinatorImpl::_cancelCatchupTakeover_inlock() {
 }
 
 void ReplicationCoordinatorImpl::_cancelAndRescheduleElectionTimeout_inlock() {
-    if (_handleElectionTimeoutCbh.isValid()) {
-        LOG(4) << "Canceling election timeout callback at " << _handleElectionTimeoutWhen;
+    // We log at level 5 except when:
+    // * This is the first time we're scheduling after becoming an electable secondary.
+    // * We are not going to reschedule the election timeout because we are shutting down or
+    //   no longer an electable secondary.
+    // * It has been at least a second since we last logged at level 4.
+    //
+    // In those instances we log at level 4.  This routine is called on every replication batch,
+    // which would produce many log lines per second, so this logging strategy provides a
+    // compromise which allows us to see the election timeout being rescheduled without spamming
+    // the logs.
+    int cancelAndRescheduleLogLevel = 5;
+    static auto logThrottleTime = _replExecutor->now();
+    const bool wasActive = _handleElectionTimeoutCbh.isValid();
+    auto now = _replExecutor->now();
+    const bool doNotReschedule = _inShutdown || !_memberState.secondary() || _selfIndex < 0 ||
+        !_rsConfig.getMemberAt(_selfIndex).isElectable();
+
+    if (doNotReschedule || !wasActive || (now - logThrottleTime) >= Seconds(1)) {
+        cancelAndRescheduleLogLevel = 4;
+        logThrottleTime = now;
+    }
+    if (wasActive) {
+        LOG_FOR_ELECTION(cancelAndRescheduleLogLevel)
+            << "Canceling election timeout callback at " << _handleElectionTimeoutWhen;
         _replExecutor->cancel(_handleElectionTimeoutCbh);
         _handleElectionTimeoutCbh = CallbackHandle();
         _handleElectionTimeoutWhen = Date_t();
     }
 
-    if (_inShutdown) {
+    if (doNotReschedule)
         return;
-    }
-
-    if (!_memberState.secondary()) {
-        return;
-    }
-
-    if (_selfIndex < 0) {
-        return;
-    }
-
-    if (!_rsConfig.getMemberAt(_selfIndex).isElectable()) {
-        return;
-    }
 
     Milliseconds randomOffset = _getRandomizedElectionOffset_inlock();
-    auto now = _replExecutor->now();
     auto when = now + _rsConfig.getElectionTimeoutPeriod() + randomOffset;
     invariant(when > now);
-    LOG_FOR_ELECTION(4) << "Scheduling election timeout callback at " << when;
+    if (wasActive) {
+        // The log level here is 4 once per second, otherwise 5.
+        LOG_FOR_ELECTION(cancelAndRescheduleLogLevel)
+            << "Rescheduling election timeout callback at " << when;
+    } else {
+        LOG_FOR_ELECTION(4) << "Scheduling election timeout callback at " << when;
+    }
     _handleElectionTimeoutWhen = when;
     _handleElectionTimeoutCbh =
-        _scheduleWorkAt(when, [=](const mongo::executor::TaskExecutor::CallbackArgs&) {
-            _startElectSelfIfEligibleV1(StartElectionReasonEnum::kElectionTimeout);
+        _scheduleWorkAt(when, [=](const mongo::executor::TaskExecutor::CallbackArgs& cbData) {
+            stdx::lock_guard<Latch> lk(_mutex);
+            if (_handleElectionTimeoutCbh == cbData.myHandle) {
+                // This lets _cancelAndRescheduleElectionTimeout_inlock know the callback
+                // has happened.
+                _handleElectionTimeoutCbh = CallbackHandle();
+            }
+            _startElectSelfIfEligibleV1(lk, StartElectionReasonEnum::kElectionTimeout);
         });
 }
 
 void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(StartElectionReasonEnum reason) {
     stdx::lock_guard<Latch> lock(_mutex);
+    _startElectSelfIfEligibleV1(lock, reason);
+}
+
+void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(WithLock,
+                                                             StartElectionReasonEnum reason) {
     // If it is not a single node replica set, no need to start an election after stepdown timeout.
     if (reason == StartElectionReasonEnum::kSingleNodePromptElection &&
         _rsConfig.getNumMembers() != 1) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index cfdd4b29a05..f4bb250ce1d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -5159,6 +5159,91 @@ TEST_F(ReplCoordTest,
     ASSERT_EQUALS(electionTimeoutWhen, replCoord->getElectionTimeout_forTest());
 }
 
+TEST_F(ReplCoordTest, CancelAndRescheduleElectionTimeoutLogging) {
+    // Log all the election messages.
+    logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogComponent::kReplicationElection,
+                                                        logger::LogSeverity::Debug(5));
+    startCapturingLogMessages();
+    // heartbeatTimeoutSecs is made large so we can advance the clock without worrying about
+    // additional heartbeats.
+    assertStartSuccess(BSON("_id"
+                            << "mySet"
+                            << "settings" << BSON("heartbeatTimeoutSecs" << 60000)
+                            << "protocolVersion" << 1 << "version" << 2 << "members"
+                            << BSON_ARRAY(BSON("host"
+                                               << "node1:12345"
+                                               << "_id" << 0)
+                                          << BSON("host"
+                                                  << "node2:12345"
+                                                  << "_id" << 1))),
+                       HostAndPort("node1", 12345));
+
+    // Setting mode to secondary should schedule the election timeout.
+    ReplicationCoordinatorImpl* replCoord = getReplCoord();
+    ASSERT_OK(replCoord->setFollowerMode(MemberState::RS_SECONDARY));
+    ASSERT_EQ(1, countLogLinesContaining("Scheduling election timeout callback"));
+    ASSERT_EQ(0, countLogLinesContaining("Rescheduling election timeout callback"));
+    ASSERT_EQ(0, countLogLinesContaining("Canceling election timeout callback"));
+
+    // Scheduling again should produce the "rescheduled", not the "scheduled", message .
+    replCoord->cancelAndRescheduleElectionTimeout();
+    ASSERT_EQ(1, countLogLinesContaining("Scheduling election timeout callback"));
+    ASSERT_EQ(1, countLogLinesContaining("Rescheduling election timeout callback"));
+    ASSERT_EQ(1, countLogLinesContaining("Canceling election timeout callback"));
+
+    auto net = getNet();
+    net->enterNetwork();
+
+    // Black hole heartbeat request scheduled after transitioning to SECONDARY.
+    ASSERT_TRUE(net->hasReadyRequests());
+    auto noi = net->getNextReadyRequest();
+    const auto& request = noi->getRequest();
+    ASSERT_EQUALS(HostAndPort("node2", 12345), request.target);
+    ASSERT_EQUALS("replSetHeartbeat", request.cmdObj.firstElement().fieldNameStringData());
+    log() << "black holing " << noi->getRequest().cmdObj;
+    net->blackHole(noi);
+
+    // Advance simulator clock to some time after the first scheduled election.
+    auto electionTimeoutWhen = replCoord->getElectionTimeout_forTest();
+    log() << "Election initially scheduled at " << electionTimeoutWhen << " (simulator time)";
+    ASSERT_GREATER_THAN(electionTimeoutWhen, net->now());
+    auto until = electionTimeoutWhen + Milliseconds(1);
+    net->runUntil(until);
+    ASSERT_EQUALS(until, net->now());
+    net->exitNetwork();
+
+    // The election should have scheduled (not rescheduled) another timeout.
+    ASSERT_EQ(2, countLogLinesContaining("Scheduling election timeout callback"));
+    ASSERT_EQ(1, countLogLinesContaining("Rescheduling election timeout callback"));
+    ASSERT_EQ(1, countLogLinesContaining("Canceling election timeout callback"));
+
+    logger::globalLogDomain()->setMinimumLoggedSeverity(logger::LogComponent::kReplicationElection,
+                                                        logger::LogSeverity::Debug(4));
+    net->enterNetwork();
+    until = electionTimeoutWhen + Milliseconds(500);
+    net->runUntil(until);
+    net->exitNetwork();
+    replCoord->cancelAndRescheduleElectionTimeout();
+
+    // We should not see this reschedule because it should be at log level 5.
+    ASSERT_EQ(2, countLogLinesContaining("Scheduling election timeout callback"));
+    ASSERT_EQ(1, countLogLinesContaining("Rescheduling election timeout callback"));
+    ASSERT_EQ(1, countLogLinesContaining("Canceling election timeout callback"));
+
+    net->enterNetwork();
+    until = electionTimeoutWhen + Milliseconds(1001);
+    net->runUntil(until);
+    net->exitNetwork();
+    replCoord->cancelAndRescheduleElectionTimeout();
+
+    stopCapturingLogMessages();
+    // We should see this reschedule at level 4 because it has been over 1 sec since we logged
+    // at level 4.
+    ASSERT_EQ(2, countLogLinesContaining("Scheduling election timeout callback"));
+    ASSERT_EQ(2, countLogLinesContaining("Rescheduling election timeout callback"));
+    ASSERT_EQ(2, countLogLinesContaining("Canceling election timeout callback"));
+}
+
 TEST_F(ReplCoordTest, AdvanceCommittedSnapshotToMostRecentSnapshotPriorToOpTimeWhenOpTimeChanges) {
     init("mySet");
 
diff --git a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
index f3a0b72e636..b5b3cd407ba 100644
--- a/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
+++ b/src/mongo/db/repl/replication_coordinator_test_fixture.cpp
@@ -86,6 +86,8 @@ ReplCoordTest::ReplCoordTest() {
 }
 
 ReplCoordTest::~ReplCoordTest() {
+    logger::globalLogDomain()->clearMinimumLoggedSeverity(
+        logger::LogComponent::kReplicationElection);
     globalFailPointRegistry().find("blockHeartbeatReconfigFinish")->setMode(FailPoint::off);
 
     if (_callShutdown) {
-- 
cgit v1.2.1