summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorSamyukta Lanka <samy.lanka@mongodb.com>2020-01-13 23:27:43 +0000
committerevergreen <evergreen@mongodb.com>2020-01-13 23:27:43 +0000
commit515b5d3510124d307e6db8c85b72c8f680ed37e1 (patch)
tree7a5a928c3a70d9b1bb3f0a0a0855c44c26909751 /src/mongo
parentcc283da7cdac667c1941b40d1fb155dbd15afe20 (diff)
downloadmongo-515b5d3510124d307e6db8c85b72c8f680ed37e1.tar.gz
SERVER-42825 Log and track metrics.repl.stateTransition counters after stopped killing user operation
(cherry picked from commit b3b494a72f0e19d7556bee627da7ae9b79e26a03) SERVER-45497 Add tests that will be fixed by future backport to backports_required_for_multiversion_tests.yml (cherry picked from commit 5fcedbdd44f19fdbaeb600b470b4166fbb2c1e97)
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/repl/replication_coordinator.h15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp86
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h38
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp7
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h5
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.cpp7
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.h5
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp15
-rw-r--r--src/mongo/db/repl/rollback_impl.h6
-rw-r--r--src/mongo/db/service_entry_point_common.cpp31
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.cpp7
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.h5
13 files changed, 179 insertions, 56 deletions
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index 433249a086f..1cdbba25b7a 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -929,6 +929,21 @@ public:
*/
inline static constexpr StringData newPrimaryMsg = "new primary"_sd;
+ /*
+ * Specifies the state transitions that kill user operations. Used for tracking state transition
+ * metrics.
+ */
+ enum class OpsKillingStateTransitionEnum { kStepUp, kStepDown, kRollback };
+
+ /**
+ * Updates metrics around user ops when a state transition that kills user ops and select
+ * internal operations occurs (i.e. step up, step down, or rollback). Also logs the metrics.
+ */
+ virtual void updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const = 0;
+
protected:
ReplicationCoordinator();
};
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 40d95badd5c..f10e828fdec 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -102,15 +102,20 @@ MONGO_FAIL_POINT_DEFINE(stepdownHangBeforeRSTLEnqueue);
// Fail setMaintenanceMode with ErrorCodes::NotSecondary to simulate a concurrent election.
MONGO_FAIL_POINT_DEFINE(setMaintenanceModeFailsWithNotSecondary);
-// Tracks the number of operations killed on step down.
+// Tracks the last state transition performed in this replca set.
+std::string lastStateTransition;
+ServerStatusMetricField<std::string> displayLastStateTransition(
+ "repl.stateTransition.lastStateTransition", &lastStateTransition);
+
+// Tracks the number of operations killed on state transition.
Counter64 userOpsKilled;
-ServerStatusMetricField<Counter64> displayuserOpsKilled("repl.stepDown.userOperationsKilled",
+ServerStatusMetricField<Counter64> displayUserOpsKilled("repl.stateTransition.userOperationsKilled",
&userOpsKilled);
-// Tracks the number of operations left running on step down.
+// Tracks the number of operations left running on state transition.
Counter64 userOpsRunning;
-ServerStatusMetricField<Counter64> displayUserOpsRunning("repl.stepDown.userOperationsRunning",
- &userOpsRunning);
+ServerStatusMetricField<Counter64> displayUserOpsRunning(
+ "repl.stateTransition.userOperationsRunning", &userOpsRunning);
using CallbackArgs = executor::TaskExecutor::CallbackArgs;
using CallbackFn = executor::TaskExecutor::CallbackFn;
@@ -1024,7 +1029,8 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
// internal operations. Although secondaries cannot accept writes, a step up can kill writes
// that were blocked behind the RSTL lock held by a step down attempt. These writes will be
// killed with a retryable error code during step up.
- AutoGetRstlForStepUpStepDown arsu(this, opCtx);
+ AutoGetRstlForStepUpStepDown arsu(
+ this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepUp);
lk.lock();
// Exit drain mode only if we're actually in draining mode, the apply buffer is empty in the
@@ -1052,10 +1058,6 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
invariant(status);
}
- // Reset the counters on step up.
- userOpsKilled.decrement(userOpsKilled.get());
- userOpsRunning.decrement(userOpsRunning.get());
-
// Must calculate the commit level again because firstOpTimeOfMyTerm wasn't set when we logged
// our election in onTransitionToPrimary(), above.
_updateLastCommittedOpTimeAndWallTime(lk);
@@ -1819,15 +1821,38 @@ void ReplicationCoordinatorImpl::waitForStepDownAttempt_forTest() {
}
}
-void ReplicationCoordinatorImpl::_updateAndLogStatsOnStepDown(
- const AutoGetRstlForStepUpStepDown* arsd) const {
- userOpsRunning.increment(arsd->getUserOpsRunning());
+void ReplicationCoordinatorImpl::updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const {
+
+ // Clear the current metrics before setting.
+ userOpsKilled.decrement(userOpsKilled.get());
+ userOpsRunning.decrement(userOpsRunning.get());
+
+ switch (stateTransition) {
+ case ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepUp:
+ lastStateTransition = "stepUp";
+ break;
+ case ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown:
+ lastStateTransition = "stepDown";
+ break;
+ case ReplicationCoordinator::OpsKillingStateTransitionEnum::kRollback:
+ lastStateTransition = "rollback";
+ break;
+ default:
+ MONGO_UNREACHABLE;
+ }
+
+ userOpsKilled.increment(numOpsKilled);
+ userOpsRunning.increment(numOpsRunning);
BSONObjBuilder bob;
+ bob.append("lastStateTransition", lastStateTransition);
bob.appendNumber("userOpsKilled", userOpsKilled.get());
bob.appendNumber("userOpsRunning", userOpsRunning.get());
- log() << "Stepping down from primary, stats: " << bob.obj();
+ log() << "State transition ops metrics: " << bob.obj();
}
void ReplicationCoordinatorImpl::_killConflictingOpsOnStepUpAndStepDown(
@@ -1850,19 +1875,25 @@ void ReplicationCoordinatorImpl::_killConflictingOpsOnStepUpAndStepDown(
if (locker->wasGlobalLockTakenInModeConflictingWithWrites() ||
PrepareConflictTracker::get(toKill).isWaitingOnPrepareConflict()) {
serviceCtx->killOperation(lk, toKill, reason);
- userOpsKilled.increment();
+ arsc->incrementUserOpsKilled();
} else {
- arsc->incrUserOpsRunningBy();
+ arsc->incrementUserOpsRunning();
}
}
}
}
ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::AutoGetRstlForStepUpStepDown(
- ReplicationCoordinatorImpl* repl, OperationContext* opCtx, Date_t deadline)
- : _replCord(repl), _opCtx(opCtx) {
+ ReplicationCoordinatorImpl* repl,
+ OperationContext* opCtx,
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ Date_t deadline)
+ : _replCord(repl), _opCtx(opCtx), _stateTransition(stateTransition) {
invariant(_replCord && _opCtx);
+ // The state transition should never be rollback within this class.
+ invariant(_stateTransition != ReplicationCoordinator::OpsKillingStateTransitionEnum::kRollback);
+
// Enqueues RSTL in X mode.
_rstlLock.emplace(_opCtx, MODE_X, ReplicationStateTransitionLockGuard::EnqueueOnly());
@@ -1912,6 +1943,8 @@ void ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::_killOpThreadFn()
if (_stopKillingOps.wait_for(
lock, Milliseconds(10).toSystemDuration(), [this] { return _killSignaled; })) {
log() << "Stopped killing user operations";
+ _replCord->updateAndLogStateTransitionMetrics(
+ _stateTransition, getUserOpsKilled(), getUserOpsRunning());
_killSignaled = false;
return;
}
@@ -1932,11 +1965,19 @@ void ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::_stopAndWaitForKi
_killOpThread.reset();
}
+size_t ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::getUserOpsKilled() const {
+ return _userOpsKilled;
+}
+
+void ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::incrementUserOpsKilled(size_t val) {
+ _userOpsKilled += val;
+}
+
size_t ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::getUserOpsRunning() const {
return _userOpsRunning;
}
-void ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::incrUserOpsRunningBy(size_t val) {
+void ReplicationCoordinatorImpl::AutoGetRstlForStepUpStepDown::incrementUserOpsRunning(size_t val) {
_userOpsRunning += val;
}
@@ -1982,7 +2023,8 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
// fail if it does not acquire the lock immediately. In such a scenario, we use the
// stepDownUntil deadline instead.
auto deadline = force ? stepDownUntil : waitUntil;
- AutoGetRstlForStepUpStepDown arsd(this, opCtx, deadline);
+ AutoGetRstlForStepUpStepDown arsd(
+ this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown, deadline);
stdx::unique_lock<Latch> lk(_mutex);
@@ -2099,7 +2141,6 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
yieldLocksForPreparedTransactions(opCtx);
lk.lock();
- _updateAndLogStatsOnStepDown(&arsd);
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx).clearElectionCandidateMetrics();
@@ -2672,7 +2713,7 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(OperationContext* opCtx,
// Primary node won't be electable or removed after the configuration change.
// So, finish the reconfig under RSTL, so that the step down occurs safely.
- arsd.emplace(this, opCtx);
+ arsd.emplace(this, opCtx, ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown);
lk.lock();
if (_topCoord->isSteppingDownUnconditionally()) {
@@ -2686,7 +2727,6 @@ void ReplicationCoordinatorImpl::_finishReplSetReconfig(OperationContext* opCtx,
yieldLocksForPreparedTransactions(opCtx);
lk.lock();
- _updateAndLogStatsOnStepDown(&arsd.get());
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx).clearElectionCandidateMetrics();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index c42b47cf73a..4807735df26 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -335,6 +335,11 @@ public:
virtual void attemptToAdvanceStableTimestamp() override;
+ virtual void updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const override;
+
// ================== Test support API ===================
/**
@@ -487,9 +492,11 @@ private:
// operations (user/system) and aborts stashed running transactions.
class AutoGetRstlForStepUpStepDown {
public:
- AutoGetRstlForStepUpStepDown(ReplicationCoordinatorImpl* repl,
- OperationContext* opCtx,
- Date_t deadline = Date_t::max());
+ AutoGetRstlForStepUpStepDown(
+ ReplicationCoordinatorImpl* repl,
+ OperationContext* opCtx,
+ ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ Date_t deadline = Date_t::max());
// Disallows copying.
AutoGetRstlForStepUpStepDown(const AutoGetRstlForStepUpStepDown&) = delete;
@@ -506,6 +513,16 @@ private:
void rstlReacquire();
/*
+ * Returns _userOpsKilled value.
+ */
+ size_t getUserOpsKilled() const;
+
+ /*
+ * Increments _userOpsKilled by val.
+ */
+ void incrementUserOpsKilled(size_t val = 1);
+
+ /*
* Returns _userOpsRunning value.
*/
size_t getUserOpsRunning() const;
@@ -513,7 +530,7 @@ private:
/*
* Increments _userOpsRunning by val.
*/
- void incrUserOpsRunningBy(size_t val = 1);
+ void incrementUserOpsRunning(size_t val = 1);
/*
* Returns the step up/step down opCtx.
@@ -566,7 +583,9 @@ private:
boost::optional<ReplicationStateTransitionLockGuard> _rstlLock;
// Thread that will run killOpThreadFn().
std::unique_ptr<stdx::thread> _killOpThread;
- // Tracks number of operations left running on step down.
+ // Tracks number of operations killed on step up / step down.
+ size_t _userOpsKilled = 0;
+ // Tracks number of operations left running on step up / step down.
size_t _userOpsRunning = 0;
// Protects killSignaled and stopKillingOps cond. variable.
Mutex _mutex = MONGO_MAKE_LATCH("AutoGetRstlForStepUpStepDown::_mutex");
@@ -574,6 +593,9 @@ private:
stdx::condition_variable _stopKillingOps;
// Once this is set to true, the killOpThreadFn method will terminate.
bool _killSignaled = false;
+ // The state transition that is in progress. Should never be set to rollback within this
+ // class.
+ ReplicationCoordinator::OpsKillingStateTransitionEnum _stateTransition;
};
// Abstract struct that holds information about clients waiting for replication.
@@ -1092,12 +1114,6 @@ private:
executor::TaskExecutor::EventHandle _stepDownStart();
/**
- * Update the "repl.stepDown.userOperationsRunning" counter and log number of operations
- * killed and left running on step down.
- */
- void _updateAndLogStatsOnStepDown(const AutoGetRstlForStepUpStepDown* arsd) const;
-
- /**
* kill all conflicting operations that are blocked either on prepare conflict or have taken
* global lock not in MODE_IS. The conflicting operations can be either user or system
* operations marked as killable.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 5b2a7730e04..a1afae9f3ae 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -405,7 +405,8 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
// kill all write operations which are no longer safe to run on step down. Also, operations that
// have taken global lock in S mode and operations blocked on prepare conflict will be killed to
// avoid 3-way deadlock between read, prepared transaction and step down thread.
- AutoGetRstlForStepUpStepDown arsd(this, opCtx.get());
+ AutoGetRstlForStepUpStepDown arsd(
+ this, opCtx.get(), ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown);
stdx::unique_lock<Latch> lk(_mutex);
// This node has already stepped down due to reconfig. So, signal anyone who is waiting on the
@@ -422,7 +423,6 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
yieldLocksForPreparedTransactions(opCtx.get());
lk.lock();
- _updateAndLogStatsOnStepDown(&arsd);
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
@@ -635,7 +635,8 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
// Primary node will be either unelectable or removed after the configuration change.
// So, finish the reconfig under RSTL, so that the step down occurs safely.
- arsd.emplace(this, opCtx.get());
+ arsd.emplace(
+ this, opCtx.get(), ReplicationCoordinator::OpsKillingStateTransitionEnum::kStepDown);
lk.lock();
if (_topCoord->isSteppingDownUnconditionally()) {
@@ -649,7 +650,6 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
yieldLocksForPreparedTransactions(opCtx.get());
lk.lock();
- _updateAndLogStatsOnStepDown(&arsd.get());
// Clear the node's election candidate metrics since it is no longer primary.
ReplicationMetrics::get(opCtx.get()).clearElectionCandidateMetrics();
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index be960f2b5b8..8ce36495bd7 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -558,5 +558,12 @@ void ReplicationCoordinatorMock::attemptToAdvanceStableTimestamp() {
return;
}
+void ReplicationCoordinatorMock::updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const {
+ return;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 8ea9a9ddd8e..21f6c7a89db 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -316,6 +316,11 @@ public:
virtual void attemptToAdvanceStableTimestamp() override;
+ virtual void updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const override;
+
virtual void setCanAcceptNonLocalWrites(bool canAcceptNonLocalWrites);
private:
diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp
index a506da67996..0478a4a7418 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.cpp
+++ b/src/mongo/db/repl/replication_coordinator_noop.cpp
@@ -466,5 +466,12 @@ void ReplicationCoordinatorNoOp::attemptToAdvanceStableTimestamp() {
MONGO_UNREACHABLE;
}
+void ReplicationCoordinatorNoOp::updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const {
+ MONGO_UNREACHABLE;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h
index e6b1b3ecd43..55c72793ee6 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.h
+++ b/src/mongo/db/repl/replication_coordinator_noop.h
@@ -255,6 +255,11 @@ public:
void attemptToAdvanceStableTimestamp() final;
+ void updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const final;
+
private:
ServiceContext* const _service;
};
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index ae022c6e09e..fa82b1aee09 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -273,13 +273,13 @@ bool RollbackImpl::_isInShutdown() const {
return _inShutdown;
}
-namespace {
-void killAllUserOperations(OperationContext* opCtx) {
+void RollbackImpl::_killAllUserOperations(OperationContext* opCtx) {
invariant(opCtx);
ServiceContext* serviceCtx = opCtx->getServiceContext();
invariant(serviceCtx);
int numOpsKilled = 0;
+ int numOpsRunning = 0;
for (ServiceContext::LockedClientsCursor cursor(serviceCtx); Client* client = cursor.next();) {
stdx::lock_guard<Client> lk(*client);
@@ -297,12 +297,17 @@ void killAllUserOperations(OperationContext* opCtx) {
if (toKill && !toKill->isKillPending()) {
serviceCtx->killOperation(lk, toKill, ErrorCodes::InterruptedDueToReplStateChange);
numOpsKilled++;
+ } else {
+ numOpsRunning++;
}
}
- log() << "Killed {} operation(s) while transitioning to ROLLBACK"_format(numOpsKilled);
+ // Update the metrics for tracking user operations during state transitions.
+ _replicationCoordinator->updateAndLogStateTransitionMetrics(
+ ReplicationCoordinator::OpsKillingStateTransitionEnum::kRollback,
+ numOpsKilled,
+ numOpsRunning);
}
-} // namespace
Status RollbackImpl::_transitionToRollback(OperationContext* opCtx) {
invariant(opCtx);
@@ -318,7 +323,7 @@ Status RollbackImpl::_transitionToRollback(OperationContext* opCtx) {
// Kill all user operations to ensure we can successfully acquire the RSTL. Since the node
// must be a secondary, this is only killing readers, whose connections will be closed
// shortly regardless.
- killAllUserOperations(opCtx);
+ _killAllUserOperations(opCtx);
rstlLock.waitForLockUntil(Date_t::max());
diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h
index 424b394fa95..517db073dd0 100644
--- a/src/mongo/db/repl/rollback_impl.h
+++ b/src/mongo/db/repl/rollback_impl.h
@@ -348,6 +348,12 @@ private:
OperationContext* opCtx, RollBackLocalOperations::RollbackCommonPoint commonPoint) const;
/**
+ * Kills all user operations currently being performed. Since this node is a secondary, these
+ * operations are all reads.
+ */
+ void _killAllUserOperations(OperationContext* opCtx);
+
+ /**
* Uses the ReplicationCoordinator to transition the current member state to ROLLBACK.
* If the transition to ROLLBACK fails, this could mean that we have been elected PRIMARY. In
* this case, we return a NotSecondary error.
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 8b64e1a6bc8..64b73e6008d 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -99,8 +99,8 @@ namespace mongo {
MONGO_FAIL_POINT_DEFINE(rsStopGetMore);
MONGO_FAIL_POINT_DEFINE(respondWithNotPrimaryInCommandDispatch);
MONGO_FAIL_POINT_DEFINE(skipCheckingForNotMasterInCommandDispatch);
-MONGO_FAIL_POINT_DEFINE(waitAfterReadCommandFinishesExecution);
MONGO_FAIL_POINT_DEFINE(sleepMillisAfterCommandExecutionBegins);
+MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution);
// Tracks the number of times a legacy unacknowledged write failed due to
// not master error resulted in network disconnection.
@@ -587,18 +587,23 @@ bool runCommandImpl(OperationContext* opCtx,
}
}
- // This failpoint should affect both getMores and commands which are read-only and thus don't
- // support writeConcern.
- if (!shouldWaitForWriteConcern || command->getLogicalOp() == LogicalOp::opGetMore) {
- MONGO_FAIL_POINT_BLOCK(waitAfterReadCommandFinishesExecution, options) {
- const BSONObj& data = options.getData();
- auto db = data["db"].str();
- if (db.empty() || request.getDatabase() == db) {
- CurOpFailpointHelpers::waitWhileFailPointEnabled(
- &waitAfterReadCommandFinishesExecution,
- opCtx,
- "waitAfterReadCommandFinishesExecution");
- }
+ // This fail point blocks all commands which are running on the specified namespace, or which
+ // are present in the given list of commands. If no namespace or command list are provided, then
+ // the fail point will block all commands.
+ MONGO_FAIL_POINT_BLOCK(waitAfterCommandFinishesExecution, options) {
+ const BSONObj& data = options.getData();
+ auto ns = data["ns"].valueStringDataSafe();
+ auto commands =
+ data.hasField("commands") ? data["commands"].Array() : std::vector<BSONElement>();
+
+ // If 'ns' or 'commands' is not set, block for all the namespaces or commands respectively.
+ if ((ns.empty() || invocation->ns().ns() == ns) &&
+ (commands.empty() ||
+ std::any_of(commands.begin(), commands.end(), [&request](auto& element) {
+ return element.valueStringDataSafe() == request.getCommandName();
+ }))) {
+ CurOpFailpointHelpers::waitWhileFailPointEnabled(
+ &waitAfterCommandFinishesExecution, opCtx, "waitAfterCommandFinishesExecution");
}
}
diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp
index be0946a5058..62a60c2bad6 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.cpp
+++ b/src/mongo/embedded/replication_coordinator_embedded.cpp
@@ -492,5 +492,12 @@ void ReplicationCoordinatorEmbedded::attemptToAdvanceStableTimestamp() {
UASSERT_NOT_IMPLEMENTED;
}
+void ReplicationCoordinatorEmbedded::updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const {
+ UASSERT_NOT_IMPLEMENTED;
+}
+
} // namespace embedded
} // namespace mongo
diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h
index 8d7788a0f41..8b208e09f4d 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.h
+++ b/src/mongo/embedded/replication_coordinator_embedded.h
@@ -263,6 +263,11 @@ public:
void attemptToAdvanceStableTimestamp() override;
+ void updateAndLogStateTransitionMetrics(
+ const ReplicationCoordinator::OpsKillingStateTransitionEnum stateTransition,
+ const size_t numOpsKilled,
+ const size_t numOpsRunning) const override;
+
private:
// Back pointer to the ServiceContext that has started the instance.
ServiceContext* const _service;