27 files changed, 987 insertions, 38 deletions
diff --git a/jstests/replsets/catchup.js b/jstests/replsets/catchup.js
index c28f5f59a31..9d90c51099c 100644
--- a/jstests/replsets/catchup.js
+++ b/jstests/replsets/catchup.js
@@ -5,6 +5,7 @@
 
 load("jstests/libs/check_log.js");
 load("jstests/libs/write_concern_util.js");
+load("jstests/replsets/libs/election_metrics.js");
 load("jstests/replsets/rslib.js");
 
 var name = "catch_up";
@@ -65,6 +66,7 @@ function stopReplicationAndEnforceNewPrimaryToCatchUp() {
         assert.writeOK(oldPrimary.getDB("test").foo.insert({x: i}));
     }
     var latestOpOnOldPrimary = getLatestOp(oldPrimary);
+
     // New primary wins immediately, but needs to catch up.
     var newPrimary = stepUpNode(oldSecondaries[0]);
     var latestOpOnNewPrimary = getLatestOp(newPrimary);
@@ -97,19 +99,102 @@ function reconfigElectionAndCatchUpTimeout(electionTimeout, catchupTimeout) {
 rst.awaitReplication();
 
 jsTest.log("Case 1: The primary is up-to-date after refreshing heartbeats.");
+let initialNewPrimaryStatus =
+    assert.commandWorked(rst.getSecondary().adminCommand({serverStatus: 1}));
+
 // Should complete transition to primary immediately.
 var newPrimary = stepUpNode(rst.getSecondary());
 // Should win an election and finish the transition very quickly.
 assert.eq(newPrimary, rst.getPrimary());
 rst.awaitReplication();
 
+// Check that the 'numCatchUps' field has not been incremented in serverStatus.
+let newNewPrimaryStatus = assert.commandWorked(newPrimary.adminCommand({serverStatus: 1}));
+verifyServerStatusChange(
+    initialNewPrimaryStatus.electionMetrics, newNewPrimaryStatus.electionMetrics, 'numCatchUps', 0);
+// Check that the 'numCatchUpsAlreadyCaughtUp' field has been incremented in serverStatus, and
+// that none of the other reasons for catchup concluding has been incremented.
+verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
+                              newNewPrimaryStatus.electionMetrics,
+                              'numCatchUpsAlreadyCaughtUp');
+
+// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response does not have
+// a 'targetCatchupOpTime' field if the target opTime for catchup is not set.
+let res = assert.commandWorked(newPrimary.adminCommand({replSetGetStatus: 1}));
+assert(res.electionCandidateMetrics,
+       () => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
+assert(!res.electionCandidateMetrics.targetCatchupOpTime,
+       () => "Response should not have an 'electionCandidateMetrics.targetCatchupOpTime' field: " +
+           tojson(res.electionCandidateMetrics));
+
+// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has a
+// 'numCatchUpOps' field once the primary is caught up, and that it has the correct value.
+assert(res.electionCandidateMetrics.numCatchUpOps,
+       () => "Response should have an 'electionCandidateMetrics.numCatchUpOps' field: " +
+           tojson(res.electionCandidateMetrics));
+assert.eq(res.electionCandidateMetrics.numCatchUpOps, 0);
+
 jsTest.log("Case 2: The primary needs to catch up, succeeds in time.");
+initialNewPrimaryStatus =
+    assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1}));
+
 var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
 
+// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response does not have
+// a 'newTermStartDate' field before the transition to primary is complete.
+res = assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1}));
+assert(res.electionCandidateMetrics,
+       () => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
+assert(!res.electionCandidateMetrics.newTermStartDate,
+       () => "Response should not have an 'electionCandidateMetrics.newTermStartDate' field: " +
+           tojson(res.electionCandidateMetrics));
+
 // Disable fail point to allow replication.
 restartServerReplication(stepUpResults.oldSecondaries);
 // getPrimary() blocks until the primary finishes drain mode.
 assert.eq(stepUpResults.newPrimary, rst.getPrimary());
+
+// Wait until the new primary completes the transition to primary and writes a no-op.
+checkLog.contains(stepUpResults.newPrimary, "transition to primary complete");
+// Check that the new primary's term has been updated because of the no-op.
+assert.eq(getLatestOp(stepUpResults.newPrimary).t, stepUpResults.latestOpOnNewPrimary.t + 1);
+
+// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has a
+// 'newTermStartDate' field once the transition to primary is complete.
+res = assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetGetStatus: 1}));
+assert(res.electionCandidateMetrics,
+       () => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
+assert(res.electionCandidateMetrics.newTermStartDate,
+       () => "Response should have an 'electionCandidateMetrics.newTermStartDate' field: " +
+           tojson(res.electionCandidateMetrics));
+
+// Check that the 'numCatchUps' field has been incremented in serverStatus.
+newNewPrimaryStatus =
+    assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
+verifyServerStatusChange(
+    initialNewPrimaryStatus.electionMetrics, newNewPrimaryStatus.electionMetrics, 'numCatchUps', 1);
+// Check that the 'numCatchUpsSucceeded' field has been incremented in serverStatus, and that
+// none of the other reasons for catchup concluding has been incremented.
+verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
+                              newNewPrimaryStatus.electionMetrics,
+                              'numCatchUpsSucceeded');
+
+// Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has
+// 'targetCatchupOpTime' and 'numCatchUpOps' fields once the primary is caught up, and that they
+// have the correct values.
+assert(res.electionCandidateMetrics.targetCatchupOpTime,
+       () => "Response should have an 'electionCandidateMetrics.targetCatchupOpTime' field: " +
+           tojson(res.electionCandidateMetrics));
+assert.eq(res.electionCandidateMetrics.targetCatchupOpTime.ts,
+          stepUpResults.latestOpOnOldPrimary.ts);
+assert.eq(res.electionCandidateMetrics.targetCatchupOpTime.t, stepUpResults.latestOpOnOldPrimary.t);
+assert(res.electionCandidateMetrics.numCatchUpOps,
+       () => "Response should have an 'electionCandidateMetrics.numCatchUpOps' field: " +
+           tojson(res.electionCandidateMetrics));
+// numCatchUpOps should be 4 because the 'foo' collection is implicitly created during the 3
+// inserts, and that's where the additional oplog entry comes from.
+assert.eq(res.electionCandidateMetrics.numCatchUpOps, 4);
+
 // Wait for all secondaries to catch up
 rst.awaitReplication();
 // Check the latest op on old primary is preserved on the new one.
@@ -146,6 +231,9 @@ stepUpResults.oldPrimary.reconnect(stepUpResults.newPrimary);
 rst.awaitReplication();
 
 jsTest.log("Case 4: The primary needs to catch up, fails due to timeout.");
+initialNewPrimaryStatus =
+    assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1}));
+
 // Reconfig to make the catchup timeout shorter.
 reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, 10 * 1000);
 
@@ -155,6 +243,14 @@ checkLog.contains(stepUpResults.newPrimary, "Catchup timed out after becoming pr
 restartServerReplication(stepUpResults.newPrimary);
 assert.eq(stepUpResults.newPrimary, rst.getPrimary());
 
+// Check that the 'numCatchUpsTimedOut' field has been incremented in serverStatus, and that
+// none of the other reasons for catchup concluding has been incremented.
+newNewPrimaryStatus =
+    assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
+verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
+                              newNewPrimaryStatus.electionMetrics,
+                              'numCatchUpsTimedOut');
+
 // Wait for the no-op "new primary" after winning an election, so that we know it has
 // finished transition to primary.
 assert.soon(function() {
@@ -171,9 +267,21 @@ jsTest.log("Case 5: The primary needs to catch up with no timeout, then gets abo
 reconfigElectionAndCatchUpTimeout(conf.settings.electionTimeoutMillis, -1);
 stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
 
+initialNewPrimaryStatus =
+    assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
+
 // Abort catchup.
 assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetAbortPrimaryCatchUp: 1}));
 
+// Check that the 'numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd' field has been
+// incremented in serverStatus, and that none of the other reasons for catchup concluding has
+// been incremented.
+newNewPrimaryStatus =
+    assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
+verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
+                              newNewPrimaryStatus.electionMetrics,
+                              'numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd');
+
 // Wait for the no-op "new primary" after winning an election, so that we know it has
 // finished transition to primary.
 assert.soon(function() {
@@ -187,11 +295,22 @@ rst.awaitReplication();
 checkOpInOplog(stepUpResults.newPrimary, stepUpResults.latestOpOnOldPrimary, 0);
 
 jsTest.log("Case 6: The primary needs to catch up with no timeout, but steps down.");
+initialNewPrimaryStatus =
+    assert.commandWorked(rst.getSecondaries()[0].adminCommand({serverStatus: 1}));
+
 var stepUpResults = stopReplicationAndEnforceNewPrimaryToCatchUp();
 
 // Step-down command should abort catchup.
 assert.commandWorked(stepUpResults.newPrimary.adminCommand({replSetStepDown: 60}));
 
+// Check that the 'numCatchUpsFailedWithError' field has been incremented in serverStatus, and
+// that none of the other reasons for catchup concluding has been incremented.
+newNewPrimaryStatus =
+    assert.commandWorked(stepUpResults.newPrimary.adminCommand({serverStatus: 1}));
+verifyCatchUpConclusionReason(initialNewPrimaryStatus.electionMetrics,
+                              newNewPrimaryStatus.electionMetrics,
+                              'numCatchUpsFailedWithError');
+
 // Rename the primary.
 var steppedDownPrimary = stepUpResults.newPrimary;
 var newPrimary = rst.getPrimary();
diff --git a/jstests/replsets/catchup_takeover_two_nodes_ahead.js b/jstests/replsets/catchup_takeover_two_nodes_ahead.js
index 6203889af88..31b78302329 100644
--- a/jstests/replsets/catchup_takeover_two_nodes_ahead.js
+++ b/jstests/replsets/catchup_takeover_two_nodes_ahead.js
@@ -33,8 +33,6 @@ var primary = replSet.getPrimary();
 var writeConcern = {writeConcern: {w: 2, wtimeout: replSet.kDefaultTimeoutMS}};
 assert.writeOK(primary.getDB(name).bar.insert({x: 100}, writeConcern));
 
-const initialPrimaryStatus = assert.commandWorked(primary.adminCommand({serverStatus: 1}));
-
 // Write something so that node 0 is ahead of node 1.
 stopServerReplication(nodes[1]);
 writeConcern = {
@@ -42,6 +40,9 @@ writeConcern = {
 };
 assert.writeOK(primary.getDB(name).bar.insert({y: 100}, writeConcern));
 
+const initialPrimaryStatus = assert.commandWorked(primary.adminCommand({serverStatus: 1}));
+const initialNode2Status = assert.commandWorked(nodes[2].adminCommand({serverStatus: 1}));
+
 // Step up one of the lagged nodes.
 assert.commandWorked(nodes[2].adminCommand({replSetStepUp: 1}));
 replSet.awaitNodesAgreeOnPrimary();
@@ -65,6 +66,13 @@ const newPrimaryStatus = assert.commandWorked(primary.adminCommand({serverStatus
 verifyServerStatusElectionReasonCounterChange(
     initialPrimaryStatus.electionMetrics, newPrimaryStatus.electionMetrics, "catchUpTakeover", 1);
 
+// Check that the 'numCatchUpsFailedWithNewTerm' field has been incremented in serverStatus, and
+// that none of the other reasons for catchup concluding has been incremented.
+const newNode2Status = assert.commandWorked(nodes[2].adminCommand({serverStatus: 1}));
+verifyCatchUpConclusionReason(initialNode2Status.electionMetrics,
+                              newNode2Status.electionMetrics,
+                              'numCatchUpsFailedWithNewTerm');
+
 // Wait until the old primary steps down so the connections won't be closed.
 replSet.waitForState(2, ReplSetTest.State.SECONDARY, replSet.kDefaultTimeoutMS);
 // Let the nodes catchup.
diff --git a/jstests/replsets/libs/election_metrics.js b/jstests/replsets/libs/election_metrics.js
index 180123a5371..36307b2117e 100644
--- a/jstests/replsets/libs/election_metrics.js
+++ b/jstests/replsets/libs/election_metrics.js
@@ -9,10 +9,11 @@
  */
 function verifyServerStatusElectionReasonCounterValue(electionMetrics, fieldName, value) {
     const field = electionMetrics[fieldName];
-    assert.eq(field["called"], value, `expected the 'called' field of ${fieldName} to be ${value}`);
+    assert.eq(
+        field["called"], value, `expected the 'called' field of '${fieldName}' to be ${value}`);
     assert.eq(field["successful"],
               value,
-              `expected the 'successful' field of ${fieldName} to be ${value}`);
+              `expected the 'successful' field of '${fieldName}' to be ${value}`);
 }
 
 /**
@@ -25,9 +26,45 @@ function verifyServerStatusElectionReasonCounterChange(
     const newField = newElectionMetrics[fieldName];
     assert.eq(initialField["called"] + expectedIncrement,
               newField["called"],
-              `expected the 'called' field of ${fieldName} to increase by ${expectedIncrement}`);
+              `expected the 'called' field of '${fieldName}' to increase by ${expectedIncrement}`);
     assert.eq(
         initialField["successful"] + expectedIncrement,
         newField["successful"],
-        `expected the 'successful' field of ${fieldName} to increase by ${expectedIncrement}`);
+        `expected the 'successful' field of '${fieldName}' to increase by ${expectedIncrement}`);
+}
+
+/**
+ * Verifies that the given field in serverStatus is incremented in the way we expect.
+ */
+function verifyServerStatusChange(initialStatus, newStatus, fieldName, expectedIncrement) {
+    assert.eq(initialStatus[fieldName] + expectedIncrement,
+              newStatus[fieldName],
+              `expected '${fieldName}' to increase by ${expectedIncrement}`);
+}
+
+/**
+ * Verifies that the given reason for primary catchup concluding is incremented in serverStatus, and
+ * that none of the other reasons are.
+ */
+function verifyCatchUpConclusionReason(initialStatus, newStatus, fieldName) {
+    const catchUpConclusionMetrics = [
+        "numCatchUpsSucceeded",
+        "numCatchUpsAlreadyCaughtUp",
+        "numCatchUpsSkipped",
+        "numCatchUpsTimedOut",
+        "numCatchUpsFailedWithError",
+        "numCatchUpsFailedWithNewTerm",
+        "numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd"
+    ];
+
+    catchUpConclusionMetrics.forEach(function(metric) {
+        if (metric === fieldName) {
+            assert.eq(initialStatus[metric] + 1,
+                      newStatus[metric],
+                      `expected '${metric}' to increase by 1`);
+        } else {
+            assert.eq(
+                initialStatus[metric], newStatus[metric], `did not expect '${metric}' to increase`);
+        }
+    });
 }
diff --git a/jstests/replsets/server_election_metrics.js b/jstests/replsets/server_election_metrics.js
index cb1ad5f1cf8..8e094492b6c 100644
--- a/jstests/replsets/server_election_metrics.js
+++ b/jstests/replsets/server_election_metrics.js
@@ -35,6 +35,15 @@ ${tojson(serverStatusResponse)}`));
     verifyElectionReasonCounterFields(serverStatusResponse, "electionTimeout");
     verifyElectionReasonCounterFields(serverStatusResponse, "freezeTimeout");
     verifyElectionMetricsField(serverStatusResponse, "numStepDownsCausedByHigherTerm");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUps");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUpsSucceeded");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUpsAlreadyCaughtUp");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUpsSkipped");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUpsTimedOut");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUpsFailedWithError");
+    verifyElectionMetricsField(serverStatusResponse, "numCatchUpsFailedWithNewTerm");
+    verifyElectionMetricsField(serverStatusResponse,
+                               "numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd");
 }
 
 // Set up the replica set.
diff --git a/jstests/replsets/stepdown.js b/jstests/replsets/stepdown.js
index ecba409427f..6abdd335e92 100644
--- a/jstests/replsets/stepdown.js
+++ b/jstests/replsets/stepdown.js
@@ -2,10 +2,14 @@
  * Check that on a loss of primary, another node doesn't assume primary if it is stale. We force a
  * stepDown to test this.
  *
+ * This test also checks that the serverStatus command metrics replSetStepDown and
+ * replSetStepDownWithForce are incremented correctly.
+ *
  * This test requires the fsync command to force a secondary to be stale.
  * @tags: [requires_fsync]
  */
 
+load("jstests/replsets/libs/election_metrics.js");
 load("jstests/replsets/rslib.js");
 
 // We are bypassing collection validation because this test runs "shutdown" command so the server is
@@ -60,6 +64,11 @@ try {
         assert.writeOK(master.getDB("foo").bar.insert({x: i}));
     }
 
+    let res = assert.commandWorked(master.adminCommand({replSetGetStatus: 1}));
+    assert(res.electionCandidateMetrics,
+           () => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
+    let intitialServerStatus = assert.commandWorked(master.adminCommand({serverStatus: 1}));
+
     jsTestLog('Do stepdown of primary ' + master + ' that should not work');
 
     // this should fail, so we don't need to try/catch
@@ -67,15 +76,126 @@ try {
         'Step down ' + master + ' expected error: ' +
         tojson(assert.commandFailed(master.getDB("admin").runCommand({replSetStepDown: 10}))));
 
+    // Check that the 'total' and 'failed' fields of 'replSetStepDown' have been incremented in
+    // serverStatus and that they have not been incremented for 'replSetStepDownWithForce'.
+    let newServerStatus = assert.commandWorked(master.adminCommand({serverStatus: 1}));
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "failed",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "total",
+                             0);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "failed",
+                             0);
+
+    // This section checks that the metrics are incremented accurately when the command fails due to
+    // an error occurring before stepDown is called in the replication coordinator, such as due to
+    // bad values or type mismatches in the arguments, or checkReplEnabledForCommand returning a bad
+    // status. The stepdown period being negative is one example of such an error, but success in
+    // this case gives us confidence that the behavior in the other cases is the same.
+
+    // Stepdown should fail because the stepdown period is negative
+    jsTestLog('Do stepdown of primary ' + master + ' that should not work');
+    assert.commandFailedWithCode(
+        master.getDB("admin").runCommand({replSetStepDown: -1, force: true}), ErrorCodes.BadValue);
+
+    // Check that the 'total' and 'failed' fields of 'replSetStepDown' and
+    // 'replSetStepDownWithForce' have been incremented in serverStatus.
+    intitialServerStatus = newServerStatus;
+    newServerStatus = assert.commandWorked(master.adminCommand({serverStatus: 1}));
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "failed",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "failed",
+                             1);
+
     jsTestLog('Do stepdown of primary ' + master + ' that should work');
     assert.commandWorked(
         master.adminCommand({replSetStepDown: ReplSetTest.kDefaultTimeoutMS, force: true}));
 
+    // Check that the 'total' fields of 'replSetStepDown' and 'replSetStepDownWithForce' have been
+    // incremented in serverStatus and that their 'failed' fields have not been incremented.
+    intitialServerStatus = newServerStatus;
+    newServerStatus = assert.commandWorked(master.adminCommand({serverStatus: 1}));
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "failed",
+                             0);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "failed",
+                             0);
+
     jsTestLog('Checking isMaster on ' + master);
     var r2 = assert.commandWorked(master.getDB("admin").runCommand({ismaster: 1}));
     jsTestLog('Result from running isMaster on ' + master + ': ' + tojson(r2));
     assert.eq(r2.ismaster, false);
     assert.eq(r2.secondary, true);
+
+    // Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has been
+    // cleared, since the node is no longer primary.
+    res = assert.commandWorked(master.adminCommand({replSetGetStatus: 1}));
+    assert(!res.electionCandidateMetrics,
+           () => "Response should not have an 'electionCandidateMetrics' field: " + tojson(res));
+
+    // This section checks that the metrics are incremented accurately when the command fails due to
+    // an error while stepping down. This is one reason the replSetStepDown command could fail once
+    // we call stepDown in the replication coordinator, but success in this case gives us confidence
+    // that the behavior in the other cases is the same.
+
+    // Stepdown should fail because the node is no longer primary
+    jsTestLog('Do stepdown of primary ' + master + ' that should not work');
+    assert.commandFailedWithCode(master.getDB("admin").runCommand(
+                                     {replSetStepDown: ReplSetTest.kDefaultTimeoutMS, force: true}),
+                                 ErrorCodes.NotMaster);
+
+    // Check that the 'total' and 'failed' fields of 'replSetStepDown' and
+    // 'replSetStepDownWithForce' have been incremented in serverStatus.
+    intitialServerStatus = newServerStatus;
+    newServerStatus = assert.commandWorked(master.adminCommand({serverStatus: 1}));
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDown,
+                             newServerStatus.metrics.commands.replSetStepDown,
+                             "failed",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "total",
+                             1);
+    verifyServerStatusChange(intitialServerStatus.metrics.commands.replSetStepDownWithForce,
+                             newServerStatus.metrics.commands.replSetStepDownWithForce,
+                             "failed",
+                             1);
 } catch (e) {
     throw e;
 } finally {
diff --git a/jstests/replsets/unconditional_step_down.js b/jstests/replsets/unconditional_step_down.js
index c9f95bcb1ac..4d5e37821db 100644
--- a/jstests/replsets/unconditional_step_down.js
+++ b/jstests/replsets/unconditional_step_down.js
@@ -106,6 +106,10 @@ function runStepDownTest({testMsg, stepDownFn, toRemovedState}) {
     jsTestLog("Wait for write cmd to reach the fail point");
     waitForCurOpByFailPoint(primaryDB, collNss, writeFailPoint);
 
+    let res = assert.commandWorked(primary.adminCommand({replSetGetStatus: 1}));
+    assert(res.electionCandidateMetrics,
+           () => "Response should have an 'electionCandidateMetrics' field: " + tojson(res));
+
     jsTestLog("Trigger step down");
     var oldConfig = stepDownFn();
 
@@ -119,6 +123,16 @@ function runStepDownTest({testMsg, stepDownFn, toRemovedState}) {
                  (toRemovedState) ? ReplSetTest.State.REMOVED : ReplSetTest.State.SECONDARY);
 
     assert.commandWorked(primary.adminCommand({configureFailPoint: writeFailPoint, mode: "off"}));
+
+    // Check that the 'electionCandidateMetrics' section of the replSetGetStatus response has been
+    // cleared, since the node is no longer primary.
+    if (!toRemovedState) {
+        res = assert.commandWorked(primary.adminCommand({replSetGetStatus: 1}));
+        assert(
+            !res.electionCandidateMetrics,
+            () => "Response should not have an 'electionCandidateMetrics' field: " + tojson(res));
+    }
+
     // Get the new primary.
     refreshConnection();
 }
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 779fb0a3275..ce402e8c0b4 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1711,6 +1711,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/op_observer',
         '$BUILD_DIR/mongo/db/query_exec',
         '$BUILD_DIR/mongo/db/repl/oplog_buffer_proxy',
+        '$BUILD_DIR/mongo/db/repl/replication_metrics',
         '$BUILD_DIR/mongo/db/s/balancer',
         '$BUILD_DIR/mongo/db/s/sharding_runtime_d',
         '$BUILD_DIR/mongo/db/service_context',
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 5b5b85c6af5..53d3e6a3612 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -300,7 +300,11 @@ void BackgroundSync::_produce() {
             log() << "Our newest OpTime : " << lastOpTimeFetched;
             log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen
                   << " from " << syncSourceResp.getSyncSource();
-            _replCoord->abortCatchupIfNeeded().transitional_ignore();
+            auto status = _replCoord->abortCatchupIfNeeded(
+                ReplicationCoordinator::PrimaryCatchUpConclusionReason::kFailedWithError);
+            if (!status.isOK()) {
+                LOG(1) << "Aborting catch-up failed with status: " << status;
+            }
             return;
         }
 
@@ -534,7 +538,11 @@ void BackgroundSync::_runRollback(OperationContext* opCtx,
                                   StorageInterface* storageInterface) {
     if (_replCoord->getMemberState().primary()) {
         warning() << "Rollback situation detected in catch-up mode. Aborting catch-up mode.";
-        _replCoord->abortCatchupIfNeeded().transitional_ignore();
+        auto status = _replCoord->abortCatchupIfNeeded(
+            ReplicationCoordinator::PrimaryCatchUpConclusionReason::kFailedWithError);
+        if (!status.isOK()) {
+            LOG(1) << "Aborting catch-up failed with status: " << status;
+        }
         return;
     }
 
diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp
index a38d510fd5a..aa09a8a55d1 100644
--- a/src/mongo/db/repl/repl_set_commands.cpp
+++ b/src/mongo/db/repl/repl_set_commands.cpp
@@ -450,16 +450,31 @@ public:
                "primary.)\n"
                "http://dochub.mongodb.org/core/replicasetcommands";
     }
-    CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") {}
+    CmdReplSetStepDown()
+        : ReplSetCommand("replSetStepDown"),
+          _stepDownCmdsWithForceExecutedMetric("commands.replSetStepDownWithForce.total",
+                                               &_stepDownCmdsWithForceExecuted),
+          _stepDownCmdsWithForceFailedMetric("commands.replSetStepDownWithForce.failed",
+                                             &_stepDownCmdsWithForceFailed) {}
     virtual bool run(OperationContext* opCtx,
                      const string&,
                      const BSONObj& cmdObj,
                      BSONObjBuilder& result) {
+        const bool force = cmdObj["force"].trueValue();
+
+        if (force) {
+            _stepDownCmdsWithForceExecuted.increment();
+        }
+
+        auto onExitGuard = makeGuard([&] {
+            if (force) {
+                _stepDownCmdsWithForceFailed.increment();
+            }
+        });
+
         Status status = ReplicationCoordinator::get(opCtx)->checkReplEnabledForCommand(&result);
         uassertStatusOK(status);
 
-        const bool force = cmdObj["force"].trueValue();
-
         long long stepDownForSecs = cmdObj.firstElement().numberLong();
         if (stepDownForSecs == 0) {
             stepDownForSecs = 60;
@@ -498,10 +513,17 @@ public:
 
         ReplicationCoordinator::get(opCtx)->stepDown(
             opCtx, force, Seconds(secondaryCatchUpPeriodSecs), Seconds(stepDownForSecs));
+
+        onExitGuard.dismiss();
         return true;
     }
 
 private:
+    mutable Counter64 _stepDownCmdsWithForceExecuted;
+    mutable Counter64 _stepDownCmdsWithForceFailed;
+    ServerStatusMetricField<Counter64> _stepDownCmdsWithForceExecutedMetric;
+    ServerStatusMetricField<Counter64> _stepDownCmdsWithForceFailedMetric;
+
     ActionSet getAuthActionSet() const override {
         return ActionSet{ActionType::replSetStateChange};
     }
@@ -743,7 +765,9 @@ public:
         uassertStatusOK(status);
         log() << "Received replSetAbortPrimaryCatchUp request";
 
-        status = ReplicationCoordinator::get(opCtx)->abortCatchupIfNeeded();
+        status = ReplicationCoordinator::get(opCtx)->abortCatchupIfNeeded(
+            ReplicationCoordinator::PrimaryCatchUpConclusionReason::
+                kFailedWithReplSetAbortPrimaryCatchUpCmd);
         if (!status.isOK()) {
             log() << "replSetAbortPrimaryCatchUp request failed" << causedBy(status);
         }
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 8b767924eac..2f8bd118552 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -863,10 +863,26 @@ public:
 
     virtual ServiceContext* getServiceContext() = 0;
 
+    enum PrimaryCatchUpConclusionReason {
+        kSucceeded,
+        kAlreadyCaughtUp,
+        kSkipped,
+        kTimedOut,
+        kFailedWithError,
+        kFailedWithNewTerm,
+        kFailedWithReplSetAbortPrimaryCatchUpCmd
+    };
+
     /**
      * Abort catchup if the node is in catchup mode.
      */
-    virtual Status abortCatchupIfNeeded() = 0;
+    virtual Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) = 0;
+
+    /**
+     * Increment the counter for the number of ops applied during catchup if the node is in catchup
+     * mode.
+     */
+    virtual void incrementNumCatchUpOpsIfCatchingUp(int numOps) = 0;
 
     /**
      * Signals that drop pending collections have been removed from storage.
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index f87f0f0681a..ba98e84b2d0 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -69,6 +69,7 @@
 #include "mongo/db/repl/repl_server_parameters_gen.h"
 #include "mongo/db/repl/repl_settings.h"
 #include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/repl/replication_metrics.h"
 #include "mongo/db/repl/replication_process.h"
 #include "mongo/db/repl/storage_interface.h"
 #include "mongo/db/s/balancer/balancer.h"
@@ -488,6 +489,9 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC
     fassert(28665, loadLastOpTimeAndWallTimeResult);
     auto opTimeToReturn = loadLastOpTimeAndWallTimeResult.getValue().opTime;
 
+    auto newTermStartDate = loadLastOpTimeAndWallTimeResult.getValue().wallTime;
+    ReplicationMetrics::get(opCtx).setNewTermStartDate(newTermStartDate);
+
     _shardingOnTransitionToPrimaryHook(opCtx);
 
     _dropAllTempCollections(opCtx);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 4f57c4ccedf..945b9983ea2 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -2110,6 +2110,10 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
 
     lk.lock();
     _updateAndLogStatsOnStepDown(&arsd);
+
+    // Clear the node's election candidate metrics since it is no longer primary.
+    ReplicationMetrics::get(opCtx).clearElectionCandidateMetrics();
+
     _topCoord->finishUnconditionalStepDown();
 
     onExitGuard.dismiss();
@@ -2374,6 +2378,9 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus(
         }
     }
 
+    BSONObj electionCandidateMetrics =
+        ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON();
+
     stdx::lock_guard<stdx::mutex> lk(_mutex);
     Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
     _topCoord->prepareStatusResponse(
@@ -2382,6 +2389,7 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus(
             static_cast<unsigned>(time(0) - serverGlobalParams.started),
             _getCurrentCommittedSnapshotOpTimeAndWallTime_inlock(),
             initialSyncProgress,
+            electionCandidateMetrics,
             _storage->getLastStableCheckpointTimestampDeprecated(_service),
             _storage->getLastStableRecoveryTimestamp(_service)},
         response,
@@ -2687,6 +2695,9 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(OperationContext* opCtx,
 
             lk.lock();
             _updateAndLogStatsOnStepDown(&arsd.get());
+
+            // Clear the node's election candidate metrics since it is no longer primary.
+            ReplicationMetrics::get(opCtx).clearElectionCandidateMetrics();
         } else {
             // Release the rstl lock as the node might have stepped down due to
             // other unconditional step down code paths like learning new term via heartbeat &
@@ -2835,7 +2846,6 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
         _readWriteAbility->setCanAcceptNonLocalWrites(lk, opCtx, canAcceptWrites);
     }
 
-
     const MemberState newState = _topCoord->getMemberState();
     if (newState == _memberState) {
         if (_topCoord->getRole() == TopologyCoordinator::Role::kCandidate) {
@@ -2867,7 +2877,16 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
     // Exit catchup mode if we're in it and enable replication producer and applier on stepdown.
     if (_memberState.primary()) {
         if (_catchupState) {
-            _catchupState->abort_inlock();
+            // _pendingTermUpdateDuringStepDown is set before stepping down due to hearing about a
+            // higher term, so that we can remember the term we heard and update our term as part of
+            // finishing stepdown. It is then unset toward the end of stepdown, after the function
+            // we are in is called. Thus we must be stepping down due to seeing a higher term if and
+            // only if _pendingTermUpdateDuringStepDown is set here.
+            if (_pendingTermUpdateDuringStepDown) {
+                _catchupState->abort_inlock(PrimaryCatchUpConclusionReason::kFailedWithNewTerm);
+            } else {
+                _catchupState->abort_inlock(PrimaryCatchUpConclusionReason::kFailedWithError);
+            }
         }
         _applierState = ApplierState::Running;
         _externalState->startProducerIfStopped();
@@ -3011,7 +3030,7 @@ void ReplicationCoordinatorImpl::CatchupState::start_inlock() {
 
     // No catchup in single node replica set.
     if (_repl->_rsConfig.getNumMembers() == 1) {
-        abort_inlock();
+        abort_inlock(PrimaryCatchUpConclusionReason::kSkipped);
         return;
     }
 
@@ -3020,7 +3039,7 @@ void ReplicationCoordinatorImpl::CatchupState::start_inlock() {
     // When catchUpTimeoutMillis is 0, we skip doing catchup entirely.
     if (catchupTimeout == ReplSetConfig::kCatchUpDisabled) {
         log() << "Skipping primary catchup since the catchup timeout is 0.";
-        abort_inlock();
+        abort_inlock(PrimaryCatchUpConclusionReason::kSkipped);
         return;
     }
 
@@ -3035,7 +3054,7 @@ void ReplicationCoordinatorImpl::CatchupState::start_inlock() {
             return;
         }
         log() << "Catchup timed out after becoming primary.";
-        abort_inlock();
+        abort_inlock(PrimaryCatchUpConclusionReason::kTimedOut);
     };
 
     // Deal with infinity and overflow - no timeout.
@@ -3048,15 +3067,20 @@ void ReplicationCoordinatorImpl::CatchupState::start_inlock() {
     auto status = _repl->_replExecutor->scheduleWorkAt(timeoutDate, std::move(timeoutCB));
     if (!status.isOK()) {
         log() << "Failed to schedule catchup timeout work.";
-        abort_inlock();
+        abort_inlock(PrimaryCatchUpConclusionReason::kFailedWithError);
         return;
     }
     _timeoutCbh = status.getValue();
+
+    _numCatchUpOps = 0;
 }
 
-void ReplicationCoordinatorImpl::CatchupState::abort_inlock() {
+void ReplicationCoordinatorImpl::CatchupState::abort_inlock(PrimaryCatchUpConclusionReason reason) {
     invariant(_repl->_getMemberState_inlock().primary());
 
+    ReplicationMetrics::get(getGlobalServiceContext())
+        .incrementNumCatchUpsConcludedForReason(reason);
+
     log() << "Exited primary catch-up mode.";
     // Clean up its own members.
     if (_timeoutCbh) {
@@ -3084,7 +3108,10 @@ void ReplicationCoordinatorImpl::CatchupState::signalHeartbeatUpdate_inlock() {
     if (*targetOpTime <= myLastApplied) {
         log() << "Caught up to the latest optime known via heartbeats after becoming primary. "
               << "Target optime: " << *targetOpTime << ". My Last Applied: " << myLastApplied;
-        abort_inlock();
+        // Report the number of ops applied during catchup in replSetGetStatus once the primary is
+        // caught up.
+        ReplicationMetrics::get(getGlobalServiceContext()).setNumCatchUpOps(_numCatchUpOps);
+        abort_inlock(PrimaryCatchUpConclusionReason::kAlreadyCaughtUp);
         return;
     }
 
@@ -3093,6 +3120,8 @@ void ReplicationCoordinatorImpl::CatchupState::signalHeartbeatUpdate_inlock() {
         return;
     }
 
+    ReplicationMetrics::get(getGlobalServiceContext()).setTargetCatchupOpTime(targetOpTime.get());
+
     log() << "Heartbeats updated catchup target optime to " << *targetOpTime;
     log() << "Latest known optime per replica set member:";
     auto opTimesPerMember = _repl->_topCoord->latestKnownOpTimeSinceHeartbeatRestartPerMember();
@@ -3103,29 +3132,49 @@ void ReplicationCoordinatorImpl::CatchupState::signalHeartbeatUpdate_inlock() {
 
     if (_waiter) {
         _repl->_opTimeWaiterList.remove_inlock(_waiter.get());
+    } else {
+        // Only increment the 'numCatchUps' election metric the first time we add a waiter, so that
+        // we only increment it once each time a primary has to catch up. If there is already an
+        // existing waiter, then the node is catching up and has already been counted.
+        ReplicationMetrics::get(getGlobalServiceContext()).incrementNumCatchUps();
     }
+
     auto targetOpTimeCB = [this, targetOpTime]() {
         // Double check the target time since stepdown may signal us too.
         const auto myLastApplied = _repl->_getMyLastAppliedOpTime_inlock();
         if (*targetOpTime <= myLastApplied) {
             log() << "Caught up to the latest known optime successfully after becoming primary. "
                   << "Target optime: " << *targetOpTime << ". My Last Applied: " << myLastApplied;
-            abort_inlock();
+            // Report the number of ops applied during catchup in replSetGetStatus once the primary
+            // is caught up.
+            ReplicationMetrics::get(getGlobalServiceContext()).setNumCatchUpOps(_numCatchUpOps);
+            abort_inlock(PrimaryCatchUpConclusionReason::kSucceeded);
         }
     };
     _waiter = stdx::make_unique<CallbackWaiter>(*targetOpTime, targetOpTimeCB);
     _repl->_opTimeWaiterList.add_inlock(_waiter.get());
 }
 
-Status ReplicationCoordinatorImpl::abortCatchupIfNeeded() {
+void ReplicationCoordinatorImpl::CatchupState::incrementNumCatchUpOps_inlock(int numOps) {
+    _numCatchUpOps += numOps;
+}
+
+Status ReplicationCoordinatorImpl::abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) {
     stdx::lock_guard<stdx::mutex> lk(_mutex);
     if (_catchupState) {
-        _catchupState->abort_inlock();
+        _catchupState->abort_inlock(reason);
         return Status::OK();
     }
     return Status(ErrorCodes::IllegalOperation, "The node is not in catch-up mode.");
 }
 
+void ReplicationCoordinatorImpl::incrementNumCatchUpOpsIfCatchingUp(int numOps) {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    if (_catchupState) {
+        _catchupState->incrementNumCatchUpOps_inlock(numOps);
+    }
+}
+
 void ReplicationCoordinatorImpl::signalDropPendingCollectionsRemovedFromStorage() {
     stdx::lock_guard<stdx::mutex> lock(_mutex);
     _wakeReadyWaiters_inlock();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 24d530fbf4e..57c31f707aa 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -321,7 +321,9 @@ public:
 
     virtual Status stepUpIfEligible(bool skipDryRun) override;
 
-    virtual Status abortCatchupIfNeeded() override;
+    virtual Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) override;
+
+    virtual void incrementNumCatchUpOpsIfCatchingUp(int numOps) override;
 
     void signalDropPendingCollectionsRemovedFromStorage() final;
 
@@ -669,9 +671,11 @@ private:
         // start() can only be called once.
         void start_inlock();
         // Reset the state itself to destruct the state.
-        void abort_inlock();
+        void abort_inlock(PrimaryCatchUpConclusionReason reason);
         // Heartbeat calls this function to update the target optime.
         void signalHeartbeatUpdate_inlock();
+        // Increment the counter for the number of ops applied during catchup.
+        void incrementNumCatchUpOps_inlock(int numOps);
 
     private:
         ReplicationCoordinatorImpl* _repl;  // Not owned.
@@ -680,6 +684,8 @@ private:
         // Handle to a Waiter that contains the current target optime to reach after which
         // we can exit catchup mode.
         std::unique_ptr<CallbackWaiter> _waiter;
+        // Counter for the number of ops applied during catchup.
+        int _numCatchUpOps;
     };
 
     // Inner class to manage the concurrency of _canAcceptNonLocalWrites and _canServeNonLocalReads.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
index 2b7d7bd62f9..ea12a516ba1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
@@ -64,6 +64,10 @@ public:
         if (_replCoord->_electionFinishedEvent.isValid()) {
             _replCoord->_replExecutor->signalEvent(_replCoord->_electionFinishedEvent);
         }
+
+        // Clear the node's election candidate metrics if it loses either the dry-run or actual
+        // election, since it will not become primary.
+        ReplicationMetrics::get(getGlobalServiceContext()).clearElectionCandidateMetrics();
     }
 
     void dismiss() {
@@ -141,6 +145,9 @@ void ReplicationCoordinatorImpl::_startElectSelfV1_inlock(
     long long term = _topCoord->getTerm();
     int primaryIndex = -1;
 
+    Date_t now = _replExecutor->now();
+    ReplicationMetrics::get(getServiceContext()).setElectionCandidateMetrics(now);
+
     if (reason == TopologyCoordinator::StartElectionReason::kStepUpRequestSkipDryRun) {
         long long newTerm = term + 1;
         log() << "skipping dry run and running for election in term " << newTerm;
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
index b1b4d4879b5..f54883200ea 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1_test.cpp
@@ -48,6 +48,8 @@
 #include "mongo/util/fail_point_service.h"
 #include "mongo/util/log.h"
 
+#include <boost/optional/optional_io.hpp>
+
 namespace mongo {
 namespace repl {
 namespace {
@@ -221,6 +223,20 @@ TEST_F(ReplCoordTest, ElectionSucceedsWhenNodeIsTheOnlyNode) {
     getReplCoord()->fillIsMasterForReplSet(&imResponse, {});
     ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
     ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();
+
+    // Check that only the 'numCatchUpsSkipped' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtxPtr.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtxPtr.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtxPtr.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtxPtr.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtxPtr.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtxPtr.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtxPtr.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(ReplCoordTest, ElectionSucceedsWhenAllNodesVoteYea) {
@@ -330,6 +346,10 @@ TEST_F(ReplCoordTest, ElectionFailsWhenInsufficientVotesAreReceivedDuringDryRun)
 
     simulateEnoughHeartbeatsForAllNodesUp();
 
+    // Check that the node's election candidate metrics are unset before it becomes primary.
+    ASSERT_BSONOBJ_EQ(
+        BSONObj(), ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON());
+
     auto electionTimeoutWhen = getReplCoord()->getElectionTimeout_forTest();
     ASSERT_NOT_EQUALS(Date_t(), electionTimeoutWhen);
     log() << "Election timeout scheduled at " << electionTimeoutWhen << " (simulator time)";
@@ -354,6 +374,11 @@ TEST_F(ReplCoordTest, ElectionFailsWhenInsufficientVotesAreReceivedDuringDryRun)
                                                                << false << "reason"
                                                                << "don't like him much")));
             voteRequests++;
+            // Check that the node's election candidate metrics are set once it has called an
+            // election.
+            ASSERT_BSONOBJ_NE(
+                BSONObj(),
+                ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON());
         } else {
             net->blackHole(noi);
         }
@@ -363,6 +388,11 @@ TEST_F(ReplCoordTest, ElectionFailsWhenInsufficientVotesAreReceivedDuringDryRun)
     stopCapturingLogMessages();
     ASSERT_EQUALS(
         1, countLogLinesContaining("not running for primary, we received insufficient votes"));
+
+    // Check that the node's election candidate metrics have been cleared, since it lost the dry-run
+    // election and will not become primary.
+    ASSERT_BSONOBJ_EQ(
+        BSONObj(), ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON());
 }
 
 TEST_F(ReplCoordTest, ElectionFailsWhenDryRunResponseContainsANewerTerm) {
@@ -653,9 +683,17 @@ TEST_F(ReplCoordTest, ElectionFailsWhenVoteRequestResponseContainsANewerTerm) {
     replCoordSetMyLastDurableOpTime(time1, Date_t() + Seconds(time1.getSecs()));
     ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
 
+    // Check that the node's election candidate metrics are unset before it becomes primary.
+    ASSERT_BSONOBJ_EQ(
+        BSONObj(), ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON());
+
     simulateEnoughHeartbeatsForAllNodesUp();
     simulateSuccessfulDryRun();
 
+    // Check that the node's election candidate metrics are set once it has called an election.
+    ASSERT_BSONOBJ_NE(
+        BSONObj(), ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON());
+
     NetworkInterfaceMock* net = getNet();
     net->enterNetwork();
     while (net->hasReadyRequests()) {
@@ -680,6 +718,11 @@ TEST_F(ReplCoordTest, ElectionFailsWhenVoteRequestResponseContainsANewerTerm) {
     stopCapturingLogMessages();
     ASSERT_EQUALS(1,
                   countLogLinesContaining("not becoming primary, we have been superseded already"));
+
+    // Check that the node's election candidate metrics have been cleared, since it lost the actual
+    // election and will not become primary.
+    ASSERT_BSONOBJ_EQ(
+        BSONObj(), ReplicationMetrics::get(getServiceContext()).getElectionCandidateMetricsBSON());
 }
 
 TEST_F(ReplCoordTest, ElectionFailsWhenTermChangesDuringDryRun) {
@@ -2199,6 +2242,25 @@ TEST_F(PrimaryCatchUpTest, PrimaryDoesNotNeedToCatchUp) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was not incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsAlreadyCaughtUp' primary catchup conclusion reason was
+    // incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
+
+    // Check that the targetCatchupOpTime metric was not set.
+    ASSERT_EQUALS(boost::none,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
 }
 
 // Heartbeats set a future target OpTime and we reached that successfully.
@@ -2208,11 +2270,23 @@ TEST_F(PrimaryCatchUpTest, CatchupSucceeds) {
     OpTime time1(Timestamp(100, 1), 0);
     OpTime time2(Timestamp(100, 2), 0);
     ReplSetConfig config = setUp3NodeReplSetAndRunForElection(time1);
+
+    // Check that the targetCatchupOpTime metric is unset before the target opTime for catchup is
+    // set.
+    ASSERT_EQUALS(boost::none,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
+
     processHeartbeatRequests([this, time2](const NetworkOpIter noi) {
         auto net = getNet();
         // The old primary accepted one more op and all nodes caught up after voting for me.
         net->scheduleResponse(noi, net->now(), makeHeartbeatResponse(time2));
     });
+
+    // Check that the targetCatchupOpTime metric was set correctly when heartbeats updated the
+    // target opTime for catchup.
+    ASSERT_EQUALS(time2,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
+
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
     advanceMyLastAppliedOpTime(time2, Date_t() + Seconds(time2.getSecs()));
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Draining);
@@ -2222,6 +2296,20 @@ TEST_F(PrimaryCatchUpTest, CatchupSucceeds) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsSucceeded' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, CatchupTimeout) {
@@ -2242,6 +2330,20 @@ TEST_F(PrimaryCatchUpTest, CatchupTimeout) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsTimedOut' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, CannotSeeAllNodes) {
@@ -2267,6 +2369,21 @@ TEST_F(PrimaryCatchUpTest, CannotSeeAllNodes) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was not incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsAlreadyCaughtUp' primary catchup conclusion reason was
+    // incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, HeartbeatTimeout) {
@@ -2292,6 +2409,21 @@ TEST_F(PrimaryCatchUpTest, HeartbeatTimeout) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was not incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsAlreadyCaughtUp' primary catchup conclusion reason was
+    // incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, PrimaryStepsDownBeforeHeartbeatRefreshing) {
@@ -2315,6 +2447,21 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownBeforeHeartbeatRefreshing) {
     auto opCtx = makeOperationContext();
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was not incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Since the primary stepped down in catchup mode because it saw a higher term, check that only
+    // the 'numCatchUpsFailedWithNewTerm' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringCatchUp) {
@@ -2329,6 +2476,10 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringCatchUp) {
         // Other nodes are ahead of me.
         getNet()->scheduleResponse(noi, getNet()->now(), makeHeartbeatResponse(time2));
     });
+
+    ASSERT_EQUALS(time2,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
+
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
     TopologyCoordinator::UpdateTermResult updateTermResult;
     auto evh = getReplCoord()->updateTerm_forTest(2, &updateTermResult);
@@ -2344,6 +2495,25 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringCatchUp) {
     auto opCtx = makeOperationContext();
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_FALSE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Since the primary stepped down in catchup mode because it saw a higher term, check that only
+    // the 'numCatchUpsFailedWithNewTerm' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
+
+    // Check that the targetCatchupOpTime metric was cleared when the node stepped down.
+    ASSERT_EQUALS(boost::none,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringDrainMode) {
@@ -2365,6 +2535,21 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringDrainMode) {
     stopCapturingLogMessages();
     ASSERT_EQUALS(1, countLogLinesContaining("Caught up to the latest"));
 
+    // Check that the number of elections requiring primary catchup was incremented.
+    auto opCtx = makeOperationContext();
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsSucceeded' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
+
     // Step down during drain mode.
     TopologyCoordinator::UpdateTermResult updateTermResult;
     auto evh = replCoord->updateTerm_forTest(2, &updateTermResult);
@@ -2383,7 +2568,6 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringDrainMode) {
         net->scheduleResponse(noi, net->now(), makeHeartbeatResponse(time2));
     });
     ASSERT(replCoord->getApplierState() == ApplierState::Draining);
-    auto opCtx = makeOperationContext();
     {
         Lock::GlobalLock lock(opCtx.get(), MODE_IX);
         ASSERT_FALSE(replCoord->canAcceptWritesForDatabase(opCtx.get(), "test"));
@@ -2392,6 +2576,21 @@ TEST_F(PrimaryCatchUpTest, PrimaryStepsDownDuringDrainMode) {
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT(replCoord->getApplierState() == ApplierState::Stopped);
     ASSERT_TRUE(replCoord->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was not incremented again.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsAlreadyCaughtUp' primary catchup conclusion reason was
+    // incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, FreshestNodeBecomesAvailableLater) {
@@ -2420,6 +2619,8 @@ TEST_F(PrimaryCatchUpTest, FreshestNodeBecomesAvailableLater) {
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
     stopCapturingLogMessages();
     ASSERT_EQ(1, countLogLinesContaining("Heartbeats updated catchup target optime"));
+    ASSERT_EQUALS(time3,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
 
     // 3) Advancing its applied optime to time 2 isn't enough.
     advanceMyLastAppliedOpTime(time2, Date_t() + Seconds(time2.getSecs()));
@@ -2440,6 +2641,8 @@ TEST_F(PrimaryCatchUpTest, FreshestNodeBecomesAvailableLater) {
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
     stopCapturingLogMessages();
     ASSERT_EQ(1, countLogLinesContaining("Heartbeats updated catchup target optime"));
+    ASSERT_EQUALS(time4,
+                  ReplicationMetrics::get(getServiceContext()).getTargetCatchupOpTime_forTesting());
 
     // 5) Advancing to time 3 isn't enough now.
     advanceMyLastAppliedOpTime(time3, Date_t() + Seconds(time3.getSecs()));
@@ -2455,6 +2658,20 @@ TEST_F(PrimaryCatchUpTest, FreshestNodeBecomesAvailableLater) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsSucceeded' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, InfiniteTimeoutAndAbort) {
@@ -2489,7 +2706,9 @@ TEST_F(PrimaryCatchUpTest, InfiniteTimeoutAndAbort) {
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
 
     // Simulate a user initiated abort.
-    ASSERT_OK(getReplCoord()->abortCatchupIfNeeded());
+    ASSERT_OK(getReplCoord()->abortCatchupIfNeeded(
+        ReplicationCoordinator::PrimaryCatchUpConclusionReason::
+            kFailedWithReplSetAbortPrimaryCatchUpCmd));
     ASSERT(getReplCoord()->getApplierState() == ApplierState::Draining);
 
     stopCapturingLogMessages();
@@ -2500,6 +2719,21 @@ TEST_F(PrimaryCatchUpTest, InfiniteTimeoutAndAbort) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd' primary catchup
+    // conclusion reason was incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(1,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 TEST_F(PrimaryCatchUpTest, ZeroTimeout) {
@@ -2514,6 +2748,60 @@ TEST_F(PrimaryCatchUpTest, ZeroTimeout) {
     signalDrainComplete(opCtx.get());
     Lock::GlobalLock lock(opCtx.get(), MODE_IX);
     ASSERT_TRUE(getReplCoord()->canAcceptWritesForDatabase(opCtx.get(), "test"));
+
+    // Check that the number of elections requiring primary catchup was not incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsSkipped' primary catchup conclusion reason was incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
+}
+
+TEST_F(PrimaryCatchUpTest, CatchUpFailsDueToPrimaryStepDown) {
+    startCapturingLogMessages();
+
+    OpTime time1(Timestamp(100, 1), 0);
+    OpTime time2(Timestamp(100, 2), 0);
+    ReplSetConfig config = setUp3NodeReplSetAndRunForElection(time1);
+    // Step down in the middle of catchup.
+    auto abortTime = getNet()->now() + config.getCatchUpTimeoutPeriod() / 2;
+    replyHeartbeatsAndRunUntil(abortTime, [this, time2](const NetworkOpIter noi) {
+        // Other nodes are ahead of me.
+        getNet()->scheduleResponse(noi, getNet()->now(), makeHeartbeatResponse(time2));
+    });
+    ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
+
+    auto opCtx = makeOperationContext();
+    getReplCoord()->stepDown(opCtx.get(), true, Milliseconds(0), Milliseconds(1000));
+    ASSERT_TRUE(getReplCoord()->getMemberState().secondary());
+    ASSERT(getReplCoord()->getApplierState() == ApplierState::Running);
+
+    stopCapturingLogMessages();
+    ASSERT_EQUALS(1, countLogLinesContaining("Exited primary catch-up mode"));
+    ASSERT_EQUALS(0, countLogLinesContaining("Caught up to the latest"));
+    ASSERT_EQUALS(0, countLogLinesContaining("Catchup timed out"));
+
+    // Check that the number of elections requiring primary catchup was incremented.
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUps_forTesting());
+
+    // Check that only the 'numCatchUpsFailedWithError' primary catchup conclusion reason was
+    // incremented.
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSucceeded_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsAlreadyCaughtUp_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsSkipped_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsTimedOut_forTesting());
+    ASSERT_EQ(1, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithError_forTesting());
+    ASSERT_EQ(0, ReplicationMetrics::get(opCtx.get()).getNumCatchUpsFailedWithNewTerm_forTesting());
+    ASSERT_EQ(0,
+              ReplicationMetrics::get(opCtx.get())
+                  .getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting());
 }
 
 }  // namespace
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 59276c9093b..309c5e2e708 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -48,6 +48,7 @@
 #include "mongo/db/repl/repl_set_heartbeat_args_v1.h"
 #include "mongo/db/repl/repl_set_heartbeat_response.h"
 #include "mongo/db/repl/replication_coordinator_impl.h"
+#include "mongo/db/repl/replication_metrics.h"
 #include "mongo/db/repl/replication_process.h"
 #include "mongo/db/repl/topology_coordinator.h"
 #include "mongo/db/repl/vote_requester.h"
@@ -424,6 +425,10 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
 
     lk.lock();
     _updateAndLogStatsOnStepDown(&arsd);
+
+    // Clear the node's election candidate metrics since it is no longer primary.
+    ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
+
     _topCoord->finishUnconditionalStepDown();
 
     const auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx.get());
@@ -646,6 +651,9 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
 
             lk.lock();
             _updateAndLogStatsOnStepDown(&arsd.get());
+
+            // Clear the node's election candidate metrics since it is no longer primary.
+            ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
         } else {
             // Release the rstl lock as the node might have stepped down due to
             // other unconditional step down code paths like learning new term via heartbeat &
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index 32589b56952..cd8bfb9dc3a 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -529,10 +529,14 @@ void ReplicationCoordinatorMock::alwaysAllowWrites(bool allowWrites) {
     _alwaysAllowWrites = allowWrites;
 }
 
-Status ReplicationCoordinatorMock::abortCatchupIfNeeded() {
+Status ReplicationCoordinatorMock::abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) {
     return Status::OK();
 }
 
+void ReplicationCoordinatorMock::incrementNumCatchUpOpsIfCatchingUp(int numOps) {
+    return;
+}
+
 void ReplicationCoordinatorMock::signalDropPendingCollectionsRemovedFromStorage() {}
 
 boost::optional<Timestamp> ReplicationCoordinatorMock::getRecoveryTimestamp() {
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 67f7d2cead8..cb3bab9e157 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -302,7 +302,9 @@ public:
         return _service;
     }
 
-    virtual Status abortCatchupIfNeeded() override;
+    virtual Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) override;
+
+    virtual void incrementNumCatchUpOpsIfCatchingUp(int numOps) override;
 
     void signalDropPendingCollectionsRemovedFromStorage() final;
 
diff --git a/src/mongo/db/repl/replication_metrics.cpp b/src/mongo/db/repl/replication_metrics.cpp
index 8cdc1405827..b6fb3289f35 100644
--- a/src/mongo/db/repl/replication_metrics.cpp
+++ b/src/mongo/db/repl/replication_metrics.cpp
@@ -128,6 +128,45 @@ void ReplicationMetrics::incrementNumStepDownsCausedByHigherTerm() {
         _electionMetrics.getNumStepDownsCausedByHigherTerm() + 1);
 }
 
+void ReplicationMetrics::incrementNumCatchUps() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    _electionMetrics.setNumCatchUps(_electionMetrics.getNumCatchUps() + 1);
+}
+
+void ReplicationMetrics::incrementNumCatchUpsConcludedForReason(
+    ReplicationCoordinator::PrimaryCatchUpConclusionReason reason) {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    switch (reason) {
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::kSucceeded:
+            _electionMetrics.setNumCatchUpsSucceeded(_electionMetrics.getNumCatchUpsSucceeded() +
+                                                     1);
+            break;
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::kAlreadyCaughtUp:
+            _electionMetrics.setNumCatchUpsAlreadyCaughtUp(
+                _electionMetrics.getNumCatchUpsAlreadyCaughtUp() + 1);
+            break;
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::kSkipped:
+            _electionMetrics.setNumCatchUpsSkipped(_electionMetrics.getNumCatchUpsSkipped() + 1);
+            break;
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::kTimedOut:
+            _electionMetrics.setNumCatchUpsTimedOut(_electionMetrics.getNumCatchUpsTimedOut() + 1);
+            break;
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::kFailedWithError:
+            _electionMetrics.setNumCatchUpsFailedWithError(
+                _electionMetrics.getNumCatchUpsFailedWithError() + 1);
+            break;
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::kFailedWithNewTerm:
+            _electionMetrics.setNumCatchUpsFailedWithNewTerm(
+                _electionMetrics.getNumCatchUpsFailedWithNewTerm() + 1);
+            break;
+        case ReplicationCoordinator::PrimaryCatchUpConclusionReason::
+            kFailedWithReplSetAbortPrimaryCatchUpCmd:
+            _electionMetrics.setNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd(
+                _electionMetrics.getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd() + 1);
+            break;
+    }
+}
+
 int ReplicationMetrics::getNumStepUpCmdsCalled_forTesting() {
     stdx::lock_guard<stdx::mutex> lk(_mutex);
     return _electionMetrics.getStepUpCmd().getCalled();
@@ -183,11 +222,93 @@ int ReplicationMetrics::getNumStepDownsCausedByHigherTerm_forTesting() {
     return _electionMetrics.getNumStepDownsCausedByHigherTerm();
 }
 
+int ReplicationMetrics::getNumCatchUps_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUps();
+}
+
+int ReplicationMetrics::getNumCatchUpsSucceeded_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsSucceeded();
+}
+
+int ReplicationMetrics::getNumCatchUpsAlreadyCaughtUp_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsAlreadyCaughtUp();
+}
+
+int ReplicationMetrics::getNumCatchUpsSkipped_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsSkipped();
+}
+
+int ReplicationMetrics::getNumCatchUpsTimedOut_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsTimedOut();
+}
+
+int ReplicationMetrics::getNumCatchUpsFailedWithError_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsFailedWithError();
+}
+
+int ReplicationMetrics::getNumCatchUpsFailedWithNewTerm_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsFailedWithNewTerm();
+}
+
+int ReplicationMetrics::getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionMetrics.getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd();
+}
+
+void ReplicationMetrics::setElectionCandidateMetrics(Date_t lastElectionDate) {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    _electionCandidateMetrics.setLastElectionDate(lastElectionDate);
+    _nodeIsCandidateOrPrimary = true;
+}
+
+void ReplicationMetrics::setTargetCatchupOpTime(OpTime opTime) {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    _electionCandidateMetrics.setTargetCatchupOpTime(opTime);
+}
+
+void ReplicationMetrics::setNumCatchUpOps(int numCatchUpOps) {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    _electionCandidateMetrics.setNumCatchUpOps(numCatchUpOps);
+}
+
+void ReplicationMetrics::setNewTermStartDate(Date_t newTermStartDate) {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    _electionCandidateMetrics.setNewTermStartDate(newTermStartDate);
+}
+
+boost::optional<OpTime> ReplicationMetrics::getTargetCatchupOpTime_forTesting() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    return _electionCandidateMetrics.getTargetCatchupOpTime();
+}
+
 BSONObj ReplicationMetrics::getElectionMetricsBSON() {
     stdx::lock_guard<stdx::mutex> lk(_mutex);
     return _electionMetrics.toBSON();
 }
 
+BSONObj ReplicationMetrics::getElectionCandidateMetricsBSON() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    if (_nodeIsCandidateOrPrimary) {
+        return _electionCandidateMetrics.toBSON();
+    }
+    return BSONObj();
+}
+
+void ReplicationMetrics::clearElectionCandidateMetrics() {
+    stdx::lock_guard<stdx::mutex> lk(_mutex);
+    _electionCandidateMetrics.setTargetCatchupOpTime(boost::none);
+    _electionCandidateMetrics.setNumCatchUpOps(boost::none);
+    _electionCandidateMetrics.setNewTermStartDate(boost::none);
+    _nodeIsCandidateOrPrimary = false;
+}
+
 class ReplicationMetrics::ElectionMetricsSSS : public ServerStatusSection {
 public:
     ElectionMetricsSSS() : ServerStatusSection("electionMetrics") {}
diff --git a/src/mongo/db/repl/replication_metrics.h b/src/mongo/db/repl/replication_metrics.h
index 564be3b08dd..a169e8a0da5 100644
--- a/src/mongo/db/repl/replication_metrics.h
+++ b/src/mongo/db/repl/replication_metrics.h
@@ -48,9 +48,13 @@ public:
     ReplicationMetrics();
     ~ReplicationMetrics();
 
+    // Election metrics
     void incrementNumElectionsCalledForReason(TopologyCoordinator::StartElectionReason reason);
     void incrementNumElectionsSuccessfulForReason(TopologyCoordinator::StartElectionReason reason);
     void incrementNumStepDownsCausedByHigherTerm();
+    void incrementNumCatchUps();
+    void incrementNumCatchUpsConcludedForReason(
+        ReplicationCoordinator::PrimaryCatchUpConclusionReason reason);
 
     int getNumStepUpCmdsCalled_forTesting();
     int getNumPriorityTakeoversCalled_forTesting();
@@ -63,8 +67,30 @@ public:
     int getNumElectionTimeoutsSuccessful_forTesting();
     int getNumFreezeTimeoutsSuccessful_forTesting();
     int getNumStepDownsCausedByHigherTerm_forTesting();
+    int getNumCatchUps_forTesting();
+    int getNumCatchUpsSucceeded_forTesting();
+    int getNumCatchUpsAlreadyCaughtUp_forTesting();
+    int getNumCatchUpsSkipped_forTesting();
+    int getNumCatchUpsTimedOut_forTesting();
+    int getNumCatchUpsFailedWithError_forTesting();
+    int getNumCatchUpsFailedWithNewTerm_forTesting();
+    int getNumCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd_forTesting();
+
+    // Election candidate metrics
+
+    // All the election candidate metrics that should be set when a node calls an election are set
+    // in this one function, so that the 'electionCandidateMetrics' section of replSetStatus shows a
+    // consistent state.
+    void setElectionCandidateMetrics(Date_t lastElectionDate);
+    void setTargetCatchupOpTime(OpTime opTime);
+    void setNumCatchUpOps(int numCatchUpOps);
+    void setNewTermStartDate(Date_t newTermStartDate);
+
+    boost::optional<OpTime> getTargetCatchupOpTime_forTesting();
 
     BSONObj getElectionMetricsBSON();
+    BSONObj getElectionCandidateMetricsBSON();
+    void clearElectionCandidateMetrics();
 
 private:
     class ElectionMetricsSSS;
@@ -73,6 +99,8 @@ private:
     ElectionMetrics _electionMetrics;
     ElectionCandidateMetrics _electionCandidateMetrics;
     ElectionParticipantMetrics _electionParticipantMetrics;
+
+    bool _nodeIsCandidateOrPrimary = false;
 };
 
 }  // namespace repl
diff --git a/src/mongo/db/repl/replication_metrics.idl b/src/mongo/db/repl/replication_metrics.idl
index a14042cbcf1..c17140b9125 100644
--- a/src/mongo/db/repl/replication_metrics.idl
+++ b/src/mongo/db/repl/replication_metrics.idl
@@ -34,9 +34,11 @@ global:
     cpp_namespace: "mongo::repl"
     cpp_includes:
         - "mongo/db/repl/election_reason_counter_parser.h"
+        - "mongo/db/repl/optime.h"
 
 imports:
     - "mongo/idl/basic_types.idl"
+    - "mongo/db/repl/replication_types.idl"
 
 types:
     ElectionReasonCounter:
@@ -76,15 +78,67 @@ structs:
                 description: "Number of times this node stepped down because it saw a higher term"
                 type: long
                 default: 0
-        
+            numCatchUps:
+                description: "Number of elections that required the primary to catch up because it
+                              was behind"
+                type: long
+                default: 0
+            numCatchUpsSucceeded:
+                description: "Number of times primary catchup concluded because the primary caught
+                              up to the latest known optime successfully"
+                type: long
+                default: 0
+            numCatchUpsAlreadyCaughtUp:
+                description: "Number of times primary catchup concluded because the primary was
+                              already caught up"
+                type: long
+                default: 0
+            numCatchUpsSkipped:
+                description: "Number of times primary catchup concluded because it was skipped"
+                type: long
+                default: 0
+            numCatchUpsTimedOut:
+                description: "Number of times primary catchup concluded because it timed out"
+                type: long
+                default: 0
+            numCatchUpsFailedWithError:
+                description: "Number of times primary catchup concluded because it failed with an
+                              error"
+                type: long
+                default: 0
+            numCatchUpsFailedWithNewTerm:
+                description: "Number of times primary catchup concluded because the primary stepped
+                              down on seeing a higher term"
+                type: long
+                default: 0
+            numCatchUpsFailedWithReplSetAbortPrimaryCatchUpCmd:
+                description: "Number of times primary catchup concluded because of the
+                              replSetAbortPrimaryCatchUp command"
+                type: long
+                default: 0
+
     ElectionCandidateMetrics:
         description: "Stores metrics that are specific to the last election in which the node was a
                       candidate"
         strict: true
         fields:
-            priorityAtElection:
-                description: "The node's priority at the time of the election"
-                type: double
+            lastElectionDate:
+                description: "Time the node called the dry run election, or the actual election if
+                              it skipped dry run"
+                type: date
+            targetCatchupOpTime:
+                description: "The node's target opTime for catchup"
+                type: optime
+                optional: true
+            numCatchUpOps:
+                description: "Number of ops applied during catchup when the primary successfully
+                              catches up"
+                type: long
+                optional: true
+            newTermStartDate:
+                description: "Time the new term oplog entry was written"
+                type: date
+                optional: true
 
     ElectionParticipantMetrics:
         description: "Stores metrics that are specific to the last election in which the node voted"
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 6463e5954f9..7efe5ee10b4 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -1459,6 +1459,10 @@ StatusWith<OpTime> SyncTail::multiApply(OperationContext* opCtx, MultiApplier::O
         }
     }
 
+    // Increment the counter for the number of ops applied during catchup if the node is in catchup
+    // mode.
+    replCoord->incrementNumCatchUpOpsIfCatchingUp(ops.size());
+
     // We have now written all database writes and updated the oplog to match.
     return ops.back().getOpTime();
 }
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 81056086087..b57480c4836 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1417,7 +1417,8 @@ const MemberConfig* TopologyCoordinator::_currentPrimaryMember() const {
 std::string TopologyCoordinator::_getReplSetStatusString() {
     // Construct a ReplSetStatusArgs using default parameters. Missing parameters will not be
     // included in the status string.
-    ReplSetStatusArgs rsStatusArgs{Date_t::now(), 0U, OpTimeAndWallTime(), BSONObj(), boost::none};
+    ReplSetStatusArgs rsStatusArgs{
+        Date_t::now(), 0U, OpTimeAndWallTime(), BSONObj(), BSONObj(), boost::none};
     BSONObjBuilder builder;
     Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
     prepareStatusResponse(rsStatusArgs, &builder, &result);
@@ -1439,6 +1440,7 @@ void TopologyCoordinator::prepareStatusResponse(const ReplSetStatusArgs& rsStatu
     const OpTime lastOpDurable = getMyLastDurableOpTime();
     const Date_t lastOpDurableWall = getMyLastDurableOpTimeAndWallTime().wallTime;
     const BSONObj& initialSyncStatus = rsStatusArgs.initialSyncStatus;
+    const BSONObj& electionCandidateMetrics = rsStatusArgs.electionCandidateMetrics;
     const boost::optional<Timestamp>& lastStableRecoveryTimestamp =
         rsStatusArgs.lastStableRecoveryTimestamp;
     const boost::optional<Timestamp>& lastStableCheckpointTimestampDeprecated =
@@ -1638,6 +1640,10 @@ void TopologyCoordinator::prepareStatusResponse(const ReplSetStatusArgs& rsStatu
         response->append("initialSyncStatus", initialSyncStatus);
     }
 
+    if (!electionCandidateMetrics.isEmpty()) {
+        response->append("electionCandidateMetrics", electionCandidateMetrics);
+    }
+
     response->append("members", membersOut);
     *result = Status::OK();
 }
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 6a6a6af4652..e29453292a4 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -301,6 +301,7 @@ public:
         const unsigned selfUptime;
         const OpTimeAndWallTime readConcernMajorityOpTime;
         const BSONObj initialSyncStatus;
+        const BSONObj electionCandidateMetrics;
 
         // boost::none if the storage engine does not support RTT, or if it does but does not
         // persist data to necessitate taking checkpoints. Timestamp::min() if a checkpoint is yet
diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
index 633c0220372..388b8e84dc1 100644
--- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp
@@ -1538,6 +1538,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
     Timestamp lastStableRecoveryTimestamp(2, 2);
     Timestamp lastStableCheckpointTimestampDeprecated(2, 2);
     BSONObj initialSyncStatus = BSON("failedInitialSyncAttempts" << 1);
+    BSONObj electionCandidateMetrics = BSON("DummyElectionMetrics" << 1);
     std::string setName = "mySet";
 
     ReplSetHeartbeatResponse hb;
@@ -1593,6 +1594,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
             static_cast<unsigned>(durationCount<Seconds>(uptimeSecs)),
             {readConcernMajorityOpTime, readConcernMajorityWallTime},
             initialSyncStatus,
+            electionCandidateMetrics,
             lastStableCheckpointTimestampDeprecated,
             lastStableRecoveryTimestamp},
         &statusBuilder,
@@ -1703,6 +1705,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
 
     ASSERT_EQUALS(2000, rsStatus["heartbeatIntervalMillis"].numberInt());
     ASSERT_BSONOBJ_EQ(initialSyncStatus, rsStatus["initialSyncStatus"].Obj());
+    ASSERT_BSONOBJ_EQ(electionCandidateMetrics, rsStatus["electionCandidateMetrics"].Obj());
 
     // Test no lastStableRecoveryTimestamp field.
     BSONObjBuilder statusBuilder2;
@@ -1711,7 +1714,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
             curTime,
             static_cast<unsigned>(durationCount<Seconds>(uptimeSecs)),
             {readConcernMajorityOpTime, readConcernMajorityWallTime},
-            initialSyncStatus},
+            initialSyncStatus,
+            BSONObj()},
         &statusBuilder2,
         &resultStatus);
     ASSERT_OK(resultStatus);
@@ -1720,6 +1724,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
     ASSERT_EQUALS(setName, rsStatus["set"].String());
     ASSERT_FALSE(rsStatus.hasField("lastStableRecoveryTimestamp"));
     ASSERT_FALSE(rsStatus.hasField("lastStableCheckpointTimestamp"));
+    ASSERT_FALSE(rsStatus.hasField("electionCandidateMetrics"));
 }
 
 TEST_F(TopoCoordTest, ReplSetGetStatusIPs) {
diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp
index ccdf18229aa..b11d0106d77 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.cpp
+++ b/src/mongo/embedded/replication_coordinator_embedded.cpp
@@ -355,7 +355,11 @@ Status ReplicationCoordinatorEmbedded::processReplSetInitiate(OperationContext*,
     UASSERT_NOT_IMPLEMENTED;
 }
 
-Status ReplicationCoordinatorEmbedded::abortCatchupIfNeeded() {
+Status ReplicationCoordinatorEmbedded::abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) {
+    UASSERT_NOT_IMPLEMENTED;
+}
+
+void ReplicationCoordinatorEmbedded::incrementNumCatchUpOpsIfCatchingUp(int numOps) {
     UASSERT_NOT_IMPLEMENTED;
 }
 
diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h
index c68a7e8b7af..1246adf7e93 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.h
+++ b/src/mongo/embedded/replication_coordinator_embedded.h
@@ -249,7 +249,9 @@ public:
 
     Status stepUpIfEligible(bool skipDryRun) override;
 
-    Status abortCatchupIfNeeded() override;
+    Status abortCatchupIfNeeded(PrimaryCatchUpConclusionReason reason) override;
+
+    void incrementNumCatchUpOpsIfCatchingUp(int numOps) override;
 
     void signalDropPendingCollectionsRemovedFromStorage() final;