SERVER-38696 Add additional metrics and logging for new step down sequence.

author: Suganthi Mani <suganthi.mani@mongodb.com> 2019-02-07 12:30:37 -0500
committer: Suganthi Mani <suganthi.mani@mongodb.com> 2019-02-13 23:49:50 -0500
commit: 6089c4c1d8f166b6b61cec980672779b7cedc303 (patch)
tree: efcf098e1e798684340731ba294e294a70ec3677 /src/mongo
parent: 83336cb56b269195110253918d226cbba4377a03 (diff)
download: mongo-6089c4c1d8f166b6b61cec980672779b7cedc303.tar.gz
4 files changed, 107 insertions, 12 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index f128d9c42e6..b1fdd0ecdad 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -99,6 +99,16 @@ MONGO_FAIL_POINT_DEFINE(stepdownHangBeforePerformingPostMemberStateUpdateActions
 MONGO_FAIL_POINT_DEFINE(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock);
 MONGO_FAIL_POINT_DEFINE(holdStableTimestampAtSpecificTimestamp);
 
+// Tracks the number of operations killed on step down.
+Counter64 userOpsKilled;
+ServerStatusMetricField<Counter64> displayuserOpsKilled("repl.stepDown.userOperationsKilled",
+                                                        &userOpsKilled);
+
+// Tracks the number of operations left running on step down.
+Counter64 userOpsRunning;
+ServerStatusMetricField<Counter64> displayUserOpsRunning("repl.stepDown.userOperationsRunning",
+                                                         &userOpsRunning);
+
 using CallbackArgs = executor::TaskExecutor::CallbackArgs;
 using CallbackFn = executor::TaskExecutor::CallbackFn;
 using CallbackHandle = executor::TaskExecutor::CallbackHandle;
@@ -1040,6 +1050,10 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
         invariant(status);
     }
 
+    // Reset the counters on step up.
+    userOpsKilled.decrement(userOpsKilled.get());
+    userOpsRunning.decrement(userOpsRunning.get());
+
     // Must calculate the commit level again because firstOpTimeOfMyTerm wasn't set when we logged
     // our election in onTransitionToPrimary(), above.
     _updateLastCommittedOpTime(lk);
@@ -1751,8 +1765,18 @@ void ReplicationCoordinatorImpl::waitForStepDownAttempt_forTest() {
     }
 }
 
+void ReplicationCoordinatorImpl::_updateAndLogStatsOnStepDown(const KillOpContainer* koc) const {
+    userOpsRunning.increment(koc->getUserOpsRunning());
+
+    BSONObjBuilder bob;
+    bob.appendNumber("userOpsKilled", userOpsKilled.get());
+    bob.appendNumber("userOpsRunning", userOpsRunning.get());
+
+    log() << "Successfully stepped down from primary, stats: " << bob.obj();
+}
+
 void ReplicationCoordinatorImpl::_killUserOperationsOnStepDown(
-    const OperationContext* stepDownOpCtx) {
+    const OperationContext* stepDownOpCtx, KillOpContainer* koc) {
     ServiceContext* serviceCtx = stepDownOpCtx->getServiceContext();
     invariant(serviceCtx);
 
@@ -1766,12 +1790,15 @@ void ReplicationCoordinatorImpl::_killUserOperationsOnStepDown(
         OperationContext* toKill = client->getOperationContext();
 
         // Don't kill the stepdown thread.
-        if (toKill && toKill->getOpID() != stepDownOpCtx->getOpID()) {
+        if (toKill && !toKill->isKillPending() && toKill->getOpID() != stepDownOpCtx->getOpID()) {
             const GlobalLockAcquisitionTracker& globalLockTracker =
                 GlobalLockAcquisitionTracker::get(toKill);
             if (globalLockTracker.getGlobalWriteLocked() ||
                 globalLockTracker.getGlobalSharedLockTaken()) {
                 serviceCtx->killOperation(lk, toKill, ErrorCodes::InterruptedDueToStepDown);
+                userOpsKilled.increment();
+            } else {
+                koc->incrUserOpsRunningBy();
             }
         }
     }
@@ -1792,7 +1819,10 @@ void ReplicationCoordinatorImpl::KillOpContainer::killOpThreadFn() {
     OperationContext* opCtx = uniqueOpCtx.get();
 
     while (true) {
-        _replCord->_killUserOperationsOnStepDown(_stepDownOpCtx);
+        // Reset the value before killing user operations as we only want to track the number
+        // of operations that's running after step down.
+        _userOpsRunning = 0;
+        _replCord->_killUserOperationsOnStepDown(_stepDownOpCtx, this);
 
         // Destroy all stashed transaction resources, in order to release locks.
         SessionKiller::Matcher matcherAllSessions(
@@ -1831,6 +1861,14 @@ void ReplicationCoordinatorImpl::KillOpContainer::stopAndWaitForKillOpThread() {
     _killOpThread.reset();
 }
 
+size_t ReplicationCoordinatorImpl::KillOpContainer::getUserOpsRunning() const {
+    return _userOpsRunning;
+}
+
+void ReplicationCoordinatorImpl::KillOpContainer::incrUserOpsRunningBy(size_t val) {
+    _userOpsRunning += val;
+}
+
 void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
                                           const bool force,
                                           const Milliseconds& waitTime,
@@ -1979,6 +2017,7 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
 
     onExitGuard.dismiss();
     updateMemberState();
+    _updateAndLogStatsOnStepDown(&koc);
 
     // Schedule work to (potentially) step back up once the stepdown period has ended.
     _scheduleWorkAt(stepDownUntil, [=](const executor::TaskExecutor::CallbackArgs& cbData) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 5632c829867..883d41555e5 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -491,6 +491,16 @@ private:
          */
         void stopAndWaitForKillOpThread();
 
+        /*
+         * Returns _userOpsRunning value.
+         */
+        size_t getUserOpsRunning() const;
+
+        /*
+         * Increments _userOpsRunning by val.
+         */
+        void incrUserOpsRunningBy(size_t val = 1);
+
     private:
         ReplicationCoordinatorImpl* const _replCord;  // not owned.
         OperationContext* const _stepDownOpCtx;       // not owned.
@@ -502,6 +512,8 @@ private:
         stdx::condition_variable _stopKillingOps;
         // Once this is set to true, the killOpThreadFn method will terminate.
         bool _killSignaled = false;
+        // Tracks number of operations left running on step down.
+        size_t _userOpsRunning = 0;
     };
 
     // Abstract struct that holds information about clients waiting for replication.
@@ -987,9 +999,15 @@ private:
     executor::TaskExecutor::EventHandle _stepDownStart();
 
     /**
+     * Update the "repl.stepDown.userOperationsRunning" counter and log number of operations
+     * killed and left running on step down.
+     */
+    void _updateAndLogStatsOnStepDown(const KillOpContainer* koc) const;
+
+    /**
      * kill all user operations that have taken a global lock except in IS mode.
      */
-    void _killUserOperationsOnStepDown(const OperationContext* stepDownOpCtx);
+    void _killUserOperationsOnStepDown(const OperationContext* stepDownOpCtx, KillOpContainer* koc);
 
     /**
      * Completes a step-down of the current node.  Must be run with a global
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 6cdbcfda8c1..0d082fe65da 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -414,6 +414,7 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
     }
     lk.unlock();
     _performPostMemberStateUpdateAction(action);
+    _updateAndLogStatsOnStepDown(&koc);
     _replExecutor->signalEvent(finishedEvent);
 }
 
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 64edc5ef272..a02c831bfb0 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -45,6 +45,7 @@
 #include "mongo/db/commands/test_commands_enabled.h"
 #include "mongo/db/concurrency/global_lock_acquisition_tracker.h"
 #include "mongo/db/curop.h"
+#include "mongo/db/curop_failpoint_helpers.h"
 #include "mongo/db/curop_metrics.h"
 #include "mongo/db/cursor_manager.h"
 #include "mongo/db/dbdirectclient.h"
@@ -98,6 +99,19 @@ namespace mongo {
 MONGO_FAIL_POINT_DEFINE(rsStopGetMore);
 MONGO_FAIL_POINT_DEFINE(respondWithNotPrimaryInCommandDispatch);
 MONGO_FAIL_POINT_DEFINE(skipCheckingForNotMasterInCommandDispatch);
+MONGO_FAIL_POINT_DEFINE(waitAfterReadCommandFinishesExecution);
+
+// Tracks the number of times a legacy unacknowledged write failed due to
+// not master error resulted in network disconnection.
+Counter64 notMasterLegacyUnackWrites;
+ServerStatusMetricField<Counter64> displayNotMasterLegacyUnackWrites(
+    "repl.network.notMasterLegacyUnacknowledgedWrites", &notMasterLegacyUnackWrites);
+
+// Tracks the number of times an unacknowledged write failed due to not master error
+// resulted in network disconnection.
+Counter64 notMasterUnackWrites;
+ServerStatusMetricField<Counter64> displayNotMasterUnackWrites(
+    "repl.network.notMasterUnacknowledgedWrites", &notMasterUnackWrites);
 
 namespace {
 using logger::LogComponent;
@@ -450,6 +464,16 @@ bool runCommandImpl(OperationContext* opCtx,
                 opCtx, invocation, txnParticipant, sessionOptions, replyBuilder);
         } else {
             invocation->run(opCtx, replyBuilder);
+            MONGO_FAIL_POINT_BLOCK(waitAfterReadCommandFinishesExecution, options) {
+                const BSONObj& data = options.getData();
+                auto db = data["db"].str();
+                if (db.empty() || request.getDatabase() == db) {
+                    CurOpFailpointHelpers::waitWhileFailPointEnabled(
+                        &waitAfterReadCommandFinishesExecution,
+                        opCtx,
+                        "waitAfterReadCommandFinishesExecution");
+                }
+            }
         }
     } else {
         auto wcResult = uassertStatusOK(extractWriteConcern(opCtx, request.body));
@@ -863,8 +887,8 @@ DbResponse receivedCommands(OperationContext* opCtx,
                             const Message& message,
                             const ServiceEntryPointCommon::Hooks& behaviors) {
     auto replyBuilder = rpc::makeReplyBuilder(rpc::protocolForMessage(message));
+    OpMsgRequest request;
     [&] {
-        OpMsgRequest request;
         try {  // Parse.
             request = rpc::opMsgRequestFromAnyProtocol(message);
         } catch (const DBException& ex) {
@@ -934,10 +958,16 @@ DbResponse receivedCommands(OperationContext* opCtx,
 
     if (OpMsg::isFlagSet(message, OpMsg::kMoreToCome)) {
         // Close the connection to get client to go through server selection again.
-        uassert(ErrorCodes::NotMaster,
-                "Not-master error during fire-and-forget command processing",
-                !LastError::get(opCtx->getClient()).hadNotMasterError());
-
+        if (LastError::get(opCtx->getClient()).hadNotMasterError()) {
+            notMasterUnackWrites.increment();
+            uasserted(ErrorCodes::NotMaster,
+                      str::stream() << "Not-master error while processing '"
+                                    << request.getCommandName()
+                                    << "' operation  on '"
+                                    << request.getDatabase()
+                                    << "' database via "
+                                    << "fire-and-forget command execution.");
+        }
         return {};  // Don't reply.
     }
 
@@ -1253,9 +1283,16 @@ DbResponse ServiceEntryPointCommon::handleRequest(OperationContext* opCtx,
         // A NotMaster error can be set either within receivedInsert/receivedUpdate/receivedDelete
         // or within the AssertionException handler above.  Either way, we want to throw an
         // exception here, which will cause the client to be disconnected.
-        uassert(ErrorCodes::NotMaster,
-                "Not-master error during legacy fire-and-forget command processing",
-                !LastError::get(opCtx->getClient()).hadNotMasterError());
+        if (LastError::get(opCtx->getClient()).hadNotMasterError()) {
+            notMasterLegacyUnackWrites.increment();
+            uasserted(ErrorCodes::NotMaster,
+                      str::stream() << "Not-master error while processing '"
+                                    << networkOpToString(op)
+                                    << "' operation  on '"
+                                    << nsString
+                                    << "' namespace via legacy "
+                                    << "fire-and-forget command execution.");
+        }
     }
 
     // Mark the op as complete, and log it if appropriate. Returns a boolean indicating whether
author	Suganthi Mani <suganthi.mani@mongodb.com>	2019-02-07 12:30:37 -0500
committer	Suganthi Mani <suganthi.mani@mongodb.com>	2019-02-13 23:49:50 -0500
commit	6089c4c1d8f166b6b61cec980672779b7cedc303 (patch)
tree	efcf098e1e798684340731ba294e294a70ec3677 /src/mongo
parent	83336cb56b269195110253918d226cbba4377a03 (diff)
download	mongo-6089c4c1d8f166b6b61cec980672779b7cedc303.tar.gz