summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/sharding/resharding_generate_oplog_entries.js3
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/collection_sharding_runtime.cpp8
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp10
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.h3
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.cpp16
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.h5
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics_helpers.cpp92
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics_helpers.h2
-rw-r--r--src/mongo/db/s/sharding_data_transform_instance_metrics.cpp38
-rw-r--r--src/mongo/db/s/sharding_data_transform_instance_metrics.h9
-rw-r--r--src/mongo/db/s/sharding_data_transform_instance_metrics_test.cpp44
-rw-r--r--src/mongo/db/service_entry_point_common.cpp3
-rw-r--r--src/mongo/db/service_entry_point_common.h4
-rw-r--r--src/mongo/db/service_entry_point_mongod.cpp6
-rw-r--r--src/mongo/embedded/service_entry_point_embedded.cpp4
-rw-r--r--src/mongo/s/stale_exception.h14
17 files changed, 252 insertions, 10 deletions
diff --git a/jstests/sharding/resharding_generate_oplog_entries.js b/jstests/sharding/resharding_generate_oplog_entries.js
index 026decca0e5..2e1815c81ca 100644
--- a/jstests/sharding/resharding_generate_oplog_entries.js
+++ b/jstests/sharding/resharding_generate_oplog_entries.js
@@ -44,6 +44,7 @@ function simulateResharding() {
let donorReshardingFields = {
"uuid": uuid,
"state": "preparing-to-donate",
+ "startTime": new Date(),
"donorFields": {
"tempNs": tempReshardingNss,
"reshardingKey": {y: 1},
@@ -249,4 +250,4 @@ jsTestLog("oplog: " + tojson(oplogEntries));
jsTestLog("oplog strict: " + tostrictjson(oplogEntries));
st.stop();
-})(); \ No newline at end of file
+})();
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index f6befa3d2c3..037e1a7dc03 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -93,6 +93,7 @@ env.Library(
'resharding/resharding_donor_service.cpp',
'resharding/resharding_future_util.cpp',
'resharding/resharding_manual_cleanup.cpp',
+ 'resharding/resharding_metrics_helpers.cpp',
'resharding/resharding_metrics.cpp',
'resharding/resharding_metrics_new.cpp',
'resharding/resharding_op_observer.cpp',
diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp
index a39d3ea92e9..4f908259d3c 100644
--- a/src/mongo/db/s/collection_sharding_runtime.cpp
+++ b/src/mongo/db/s/collection_sharding_runtime.cpp
@@ -34,7 +34,10 @@
#include "mongo/base/checked_cast.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/global_settings.h"
+#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/s/operation_sharding_state.h"
+#include "mongo/db/s/resharding/resharding_donor_recipient_common.h"
+#include "mongo/db/s/sharding_data_transform_metrics.h"
#include "mongo/db/s/sharding_runtime_d_params_gen.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/logv2/log.h"
@@ -357,7 +360,10 @@ CollectionShardingRuntime::_getMetadataWithVersionCheckAt(
receivedShardVersion,
boost::none /* wantedVersion */,
ShardingState::get(opCtx)->shardId(),
- std::move(criticalSectionSignal)),
+ std::move(criticalSectionSignal),
+ opCtx->lockState()->isWriteLocked()
+ ? StaleConfigInfo::OperationType::kWrite
+ : StaleConfigInfo::OperationType::kRead),
str::stream() << "The critical section for " << _nss.ns()
<< " is acquired with reason: " << reason,
!criticalSectionSignal);
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index 99f98abf0a7..4b48ec91761 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -977,6 +977,10 @@ ReshardingCoordinatorService::ReshardingCoordinator::ReshardingCoordinator(
: PrimaryOnlyService::TypedInstance<ReshardingCoordinator>(),
_id(coordinatorDoc.getReshardingUUID().toBSON()),
_coordinatorService(coordinatorService),
+ _metricsNew{
+ ShardingDataTransformMetrics::isEnabled()
+ ? ReshardingMetricsNew::initializeFrom(coordinatorDoc, getGlobalServiceContext())
+ : nullptr},
_metadata(coordinatorDoc.getCommonReshardingMetadata()),
_coordinatorDoc(coordinatorDoc),
_markKilledExecutor(std::make_shared<ThreadPool>([] {
@@ -1259,6 +1263,9 @@ ReshardingCoordinatorService::ReshardingCoordinator::_commitAndFinishReshardOper
})
.then([this, executor] { return _awaitAllParticipantShardsDone(executor); })
.then([this, executor] {
+ if (ShardingDataTransformMetrics::isEnabled()) {
+ _metricsNew->onCriticalSectionEnd();
+ }
// Best-effort attempt to trigger a refresh on the participant shards so
// they see the collection metadata without reshardingFields and no longer
// throw ReshardCollectionInProgress. There is no guarantee this logic ever
@@ -1685,6 +1692,9 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
this->_updateCoordinatorDocStateAndCatalogEntries(CoordinatorStateEnum::kBlockingWrites,
_coordinatorDoc);
+ if (ShardingDataTransformMetrics::isEnabled()) {
+ _metricsNew->onCriticalSectionBegin();
+ }
})
.then([this] { return _waitForMajority(_ctHolder->getAbortToken()); })
.thenRunOn(**executor)
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.h b/src/mongo/db/s/resharding/resharding_coordinator_service.h
index a24569ecc44..280abbac2f3 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.h
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.h
@@ -33,6 +33,7 @@
#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/s/resharding/coordinator_document_gen.h"
#include "mongo/db/s/resharding/resharding_coordinator_observer.h"
+#include "mongo/db/s/resharding/resharding_metrics_new.h"
#include "mongo/platform/mutex.h"
#include "mongo/s/catalog/type_chunk.h"
#include "mongo/s/catalog/type_collection.h"
@@ -506,6 +507,8 @@ private:
// The primary-only service instance corresponding to the coordinator instance. Not owned.
const ReshardingCoordinatorService* const _coordinatorService;
+ std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+
// The in-memory representation of the immutable portion of the document in
// config.reshardingOperations.
const CommonReshardingMetadata _metadata;
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.cpp b/src/mongo/db/s/resharding/resharding_donor_service.cpp
index 722cd4453ce..2eff19e9c0f 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service.cpp
@@ -204,6 +204,9 @@ ReshardingDonorService::DonorStateMachine::DonorStateMachine(
std::unique_ptr<DonorStateMachineExternalState> externalState)
: repl::PrimaryOnlyService::TypedInstance<DonorStateMachine>(),
_donorService(donorService),
+ _metricsNew{ShardingDataTransformMetrics::isEnabled()
+ ? ReshardingMetricsNew::initializeFrom(donorDoc, getGlobalServiceContext())
+ : nullptr},
_metadata{donorDoc.getCommonReshardingMetadata()},
_recipientShardIds{donorDoc.getRecipientShards()},
_donorCtx{donorDoc.getMutableState()},
@@ -378,6 +381,9 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
ShardingCatalogClient::kLocalWriteConcern);
_metrics()->leaveCriticalSection(getCurrentTime());
+ if (ShardingDataTransformMetrics::isEnabled()) {
+ _metricsNew->onCriticalSectionEnd();
+ }
}
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
@@ -512,6 +518,13 @@ void ReshardingDonorService::DonorStateMachine::onReshardingFieldsChanges(
}
}
+void ReshardingDonorService::DonorStateMachine::onWriteDuringCriticalSection() {
+ if (!ShardingDataTransformMetrics::isEnabled()) {
+ return;
+ }
+ _metricsNew->onWriteDuringCriticalSection();
+}
+
SharedSemiFuture<void> ReshardingDonorService::DonorStateMachine::awaitCriticalSectionAcquired() {
return _critSecWasAcquired.getFuture();
}
@@ -673,6 +686,9 @@ void ReshardingDonorService::DonorStateMachine::
ShardingCatalogClient::kLocalWriteConcern);
_metrics()->enterCriticalSection(getCurrentTime());
+ if (ShardingDataTransformMetrics::isEnabled()) {
+ _metricsNew->onCriticalSectionBegin();
+ }
}
{
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.h b/src/mongo/db/s/resharding/resharding_donor_service.h
index b50c88b6af5..6b7316150f7 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.h
+++ b/src/mongo/db/s/resharding/resharding_donor_service.h
@@ -33,6 +33,7 @@
#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/s/resharding/donor_document_gen.h"
#include "mongo/db/s/resharding/resharding_metrics.h"
+#include "mongo/db/s/resharding/resharding_metrics_new.h"
#include "mongo/s/resharding/type_collection_fields_gen.h"
namespace mongo {
@@ -101,6 +102,8 @@ public:
void onReshardingFieldsChanges(OperationContext* opCtx,
const TypeCollectionReshardingFields& reshardingFields);
+ void onWriteDuringCriticalSection();
+
SharedSemiFuture<void> awaitCriticalSectionAcquired();
SharedSemiFuture<void> awaitCriticalSectionPromoted();
@@ -221,6 +224,8 @@ private:
// The primary-only service instance corresponding to the donor instance. Not owned.
const ReshardingDonorService* const _donorService;
+ std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+
// The in-memory representation of the immutable portion of the document in
// config.localReshardingOperations.donor.
const CommonReshardingMetadata _metadata;
diff --git a/src/mongo/db/s/resharding/resharding_metrics_helpers.cpp b/src/mongo/db/s/resharding/resharding_metrics_helpers.cpp
new file mode 100644
index 00000000000..a056596d2e7
--- /dev/null
+++ b/src/mongo/db/s/resharding/resharding_metrics_helpers.cpp
@@ -0,0 +1,92 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kResharding
+
+#include "mongo/db/s/resharding/resharding_metrics_helpers.h"
+#include "mongo/db/catalog_raii.h"
+#include "mongo/db/s/collection_sharding_runtime.h"
+#include "mongo/db/s/resharding/resharding_donor_recipient_common.h"
+#include "mongo/logv2/log.h"
+
+namespace mongo {
+namespace resharding_metrics {
+
+namespace {
+void onCriticalSectionErrorThrows(OperationContext* opCtx, const StaleConfigInfo& info) {
+ if (!ShardingDataTransformMetrics::isEnabled()) {
+ return;
+ }
+ const auto& operationType = info.getDuringOperationType();
+ if (!operationType) {
+ return;
+ }
+ AutoGetCollection autoColl(opCtx, info.getNss(), MODE_IS);
+ auto csr = CollectionShardingRuntime::get(opCtx, info.getNss());
+ auto metadata = csr->getCurrentMetadataIfKnown();
+ if (!metadata || !metadata->isSharded()) {
+ return;
+ }
+ const auto& reshardingFields = metadata->getReshardingFields();
+ if (!reshardingFields) {
+ return;
+ }
+ auto stateMachine =
+ resharding::tryGetReshardingStateMachine<ReshardingDonorService,
+ ReshardingDonorService::DonorStateMachine,
+ ReshardingDonorDocument>(
+ opCtx, reshardingFields->getReshardingUUID());
+ if (!stateMachine) {
+ return;
+ }
+ switch (*operationType) {
+ case StaleConfigInfo::OperationType::kWrite:
+ (*stateMachine)->onWriteDuringCriticalSection();
+ return;
+ case StaleConfigInfo::OperationType::kRead:
+ return;
+ }
+}
+} // namespace
+
+
+void onCriticalSectionError(OperationContext* opCtx, const StaleConfigInfo& info) noexcept {
+ try {
+ onCriticalSectionErrorThrows(opCtx, info);
+ } catch (const DBException& e) {
+ LOGV2(6437201,
+ "Exception while recording resharding metrics after an operation failure due to the "
+ "critical section being held",
+ "Status"_attr = redact(e.toStatus()));
+ }
+}
+
+} // namespace resharding_metrics
+
+} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_metrics_helpers.h b/src/mongo/db/s/resharding/resharding_metrics_helpers.h
index b46a11ad679..d972e277701 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_helpers.h
+++ b/src/mongo/db/s/resharding/resharding_metrics_helpers.h
@@ -70,6 +70,8 @@ inline constexpr ShardingDataTransformMetrics::Role getRoleForStateDocument() {
MONGO_UNREACHABLE;
}
+void onCriticalSectionError(OperationContext* opCtx, const StaleConfigInfo& info) noexcept;
+
} // namespace resharding_metrics
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp b/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
index 27cc2e7e238..1f14094cca5 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
@@ -39,6 +39,10 @@ constexpr auto TEMP_VALUE = "placeholder";
namespace mongo {
+namespace {
+constexpr auto kNoDate = Date_t::min();
+}
+
ShardingDataTransformInstanceMetrics::ShardingDataTransformInstanceMetrics(
UUID instanceId,
BSONObj originalCommand,
@@ -78,7 +82,10 @@ ShardingDataTransformInstanceMetrics::ShardingDataTransformInstanceMetrics(
_insertsApplied{0},
_updatesApplied{0},
_deletesApplied{0},
- _oplogEntriesApplied{0} {}
+ _oplogEntriesApplied{0},
+ _criticalSectionStartTime{kNoDate},
+ _criticalSectionEndTime{kNoDate},
+ _writesDuringCriticalSection{0} {}
ShardingDataTransformInstanceMetrics::~ShardingDataTransformInstanceMetrics() {
if (_deregister) {
@@ -133,12 +140,12 @@ BSONObj ShardingDataTransformInstanceMetrics::reportForCurrentOp() const noexcep
builder.append(kCoordinatorState, getStateString());
builder.append(kApplyTimeElapsed, TEMP_VALUE);
builder.append(kCopyTimeElapsed, TEMP_VALUE);
- builder.append(kCriticalSectionTimeElapsed, TEMP_VALUE);
+ builder.append(kCriticalSectionTimeElapsed, getCriticalSectionElapsedTimeSecs());
break;
case Role::kDonor:
builder.append(kDonorState, getStateString());
- builder.append(kCriticalSectionTimeElapsed, TEMP_VALUE);
- builder.append(kCountWritesDuringCriticalSection, TEMP_VALUE);
+ builder.append(kCriticalSectionTimeElapsed, getCriticalSectionElapsedTimeSecs());
+ builder.append(kCountWritesDuringCriticalSection, _writesDuringCriticalSection.load());
builder.append(kCountReadsDuringCriticalSection, TEMP_VALUE);
break;
case Role::kRecipient:
@@ -180,9 +187,32 @@ void ShardingDataTransformInstanceMetrics::onOplogEntriesApplied(int64_t numEntr
_oplogEntriesApplied.addAndFetch(numEntries);
}
+void ShardingDataTransformInstanceMetrics::onWriteDuringCriticalSection() {
+ _writesDuringCriticalSection.addAndFetch(1);
+}
+
+void ShardingDataTransformInstanceMetrics::onCriticalSectionBegin() {
+ _criticalSectionStartTime.store(_clockSource->now());
+}
+
+void ShardingDataTransformInstanceMetrics::onCriticalSectionEnd() {
+ _criticalSectionEndTime.store(_clockSource->now());
+}
inline int64_t ShardingDataTransformInstanceMetrics::getOperationRunningTimeSecs() const {
return durationCount<Seconds>(_clockSource->now() - _startTime);
}
+int64_t ShardingDataTransformInstanceMetrics::getCriticalSectionElapsedTimeSecs() const {
+ auto start = _criticalSectionStartTime.load();
+ if (start == kNoDate) {
+ return 0;
+ }
+ auto end = _criticalSectionEndTime.load();
+ if (end == kNoDate) {
+ end = _clockSource->now();
+ }
+ return durationCount<Seconds>(end - start);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics.h b/src/mongo/db/s/sharding_data_transform_instance_metrics.h
index 189f7819036..77fd973458e 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics.h
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics.h
@@ -68,6 +68,9 @@ public:
void onUpdateApplied();
void onDeleteApplied();
void onOplogEntriesApplied(int64_t numEntries);
+ void onWriteDuringCriticalSection();
+ void onCriticalSectionBegin();
+ void onCriticalSectionEnd();
Role getRole() const;
protected:
@@ -109,6 +112,7 @@ protected:
private:
inline int64_t getOperationRunningTimeSecs() const;
+ int64_t getCriticalSectionElapsedTimeSecs() const;
ClockSource* _clockSource;
ObserverPtr _observer;
@@ -116,10 +120,15 @@ private:
ShardingDataTransformCumulativeMetrics::DeregistrationFunction _deregister;
const Date_t _startTime;
+
AtomicWord<int64_t> _insertsApplied;
AtomicWord<int64_t> _updatesApplied;
AtomicWord<int64_t> _deletesApplied;
AtomicWord<int64_t> _oplogEntriesApplied;
+
+ AtomicWord<Date_t> _criticalSectionStartTime;
+ AtomicWord<Date_t> _criticalSectionEndTime;
+ AtomicWord<int64_t> _writesDuringCriticalSection;
};
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics_test.cpp b/src/mongo/db/s/sharding_data_transform_instance_metrics_test.cpp
index 6c6a2b0c3d2..0ffa17ea88e 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics_test.cpp
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics_test.cpp
@@ -178,7 +178,6 @@ TEST_F(ShardingDataTransformInstanceMetricsTest, OnDeleteAppliedShouldIncrementD
ASSERT_EQ(report.getIntField("deletesApplied"), 1);
}
-
TEST_F(ShardingDataTransformInstanceMetricsTest,
OnOplogsEntriesAppliedShouldIncrementOplogsEntriesApplied) {
auto metrics = createInstanceMetrics(UUID::gen(), Role::kRecipient);
@@ -191,6 +190,49 @@ TEST_F(ShardingDataTransformInstanceMetricsTest,
ASSERT_EQ(report.getIntField("oplogEntriesApplied"), 100);
}
+TEST_F(ShardingDataTransformInstanceMetricsTest, DonorIncrementWritesDuringCriticalSection) {
+ auto metrics = createInstanceMetrics(UUID::gen(), Role::kDonor);
+
+ auto report = metrics->reportForCurrentOp();
+ ASSERT_EQ(report.getIntField("countWritesDuringCriticalSection"), 0);
+ metrics->onWriteDuringCriticalSection();
+
+ report = metrics->reportForCurrentOp();
+ ASSERT_EQ(report.getIntField("countWritesDuringCriticalSection"), 1);
+}
+
+TEST_F(ShardingDataTransformInstanceMetricsTest, CurrentOpReportsCriticalSectionTime) {
+ const auto roles = {Role::kDonor, Role::kCoordinator};
+ for (const auto& role : roles) {
+ LOGV2(6437200, "CurrentOpReportsCriticalSectionTime", "Role"_attr = role);
+ constexpr auto kFiveSeconds = Milliseconds(5000);
+ auto uuid = UUID::gen();
+ const auto& clock = getClockSource();
+ auto metrics = std::make_unique<ShardingDataTransformInstanceMetrics>(
+ uuid, kTestCommand, kTestNamespace, role, clock->now(), clock, &_cumulativeMetrics);
+
+ // Reports 0 before critical section entered.
+ clock->advance(kFiveSeconds);
+ auto report = metrics->reportForCurrentOp();
+ ASSERT_EQ(report.getIntField("totalCriticalSectionTimeElapsedSecs"), 0);
+
+ // Reports time so far during critical section.
+ metrics->onCriticalSectionBegin();
+ clock->advance(kFiveSeconds);
+ report = metrics->reportForCurrentOp();
+ ASSERT_EQ(report.getIntField("totalCriticalSectionTimeElapsedSecs"), 5);
+ clock->advance(kFiveSeconds);
+ report = metrics->reportForCurrentOp();
+ ASSERT_EQ(report.getIntField("totalCriticalSectionTimeElapsedSecs"), 10);
+
+ // Still reports total time after critical section ends.
+ metrics->onCriticalSectionEnd();
+ clock->advance(kFiveSeconds);
+ report = metrics->reportForCurrentOp();
+ ASSERT_EQ(report.getIntField("totalCriticalSectionTimeElapsedSecs"), 10);
+ }
+}
+
TEST_F(ShardingDataTransformInstanceMetricsTest, CurrentOpReportsRunningTime) {
auto uuid = UUID::gen();
auto now = getClockSource()->now();
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 9093d80ddc4..be2b96c0657 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -77,6 +77,7 @@
#include "mongo/db/repl/tenant_migration_access_blocker_util.h"
#include "mongo/db/request_execution_context.h"
#include "mongo/db/s/operation_sharding_state.h"
+#include "mongo/db/s/resharding/resharding_metrics_helpers.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/s/sharding_statistics.h"
#include "mongo/db/s/transaction_coordinator_factory.h"
@@ -1688,6 +1689,8 @@ Future<void> ExecCommandDatabase::_commandExec() {
!_refreshedCollection) {
if (auto sce = s.extraInfo<StaleConfigInfo>()) {
if (sce->getCriticalSectionSignal()) {
+ _execContext->behaviors->handleReshardingCriticalSectionMetrics(opCtx,
+ *sce);
// The shard is in a critical section, so we cannot retry locally
OperationShardingState::waitForCriticalSectionToComplete(
opCtx, *sce->getCriticalSectionSignal())
diff --git a/src/mongo/db/service_entry_point_common.h b/src/mongo/db/service_entry_point_common.h
index b38ea9b61bd..9fa9dfa886c 100644
--- a/src/mongo/db/service_entry_point_common.h
+++ b/src/mongo/db/service_entry_point_common.h
@@ -93,6 +93,10 @@ struct ServiceEntryPointCommon {
OperationContext* opCtx, const ShardCannotRefreshDueToLocksHeldInfo& refreshInfo) const
noexcept = 0;
+ virtual void handleReshardingCriticalSectionMetrics(OperationContext* opCtx,
+ const StaleConfigInfo& se) const
+ noexcept = 0;
+
virtual void resetLockerState(OperationContext* opCtx) const noexcept = 0;
MONGO_WARN_UNUSED_RESULT_FUNCTION virtual std::unique_ptr<PolymorphicScoped>
diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp
index 991a099b18b..df50322d6c2 100644
--- a/src/mongo/db/service_entry_point_mongod.cpp
+++ b/src/mongo/db/service_entry_point_mongod.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/speculative_majority_read_info.h"
#include "mongo/db/s/operation_sharding_state.h"
+#include "mongo/db/s/resharding/resharding_metrics_helpers.h"
#include "mongo/db/s/scoped_operation_completion_sharding_actions.h"
#include "mongo/db/s/shard_filtering_metadata_refresh.h"
#include "mongo/db/s/sharding_state.h"
@@ -254,6 +255,11 @@ public:
.isOK();
}
+ void handleReshardingCriticalSectionMetrics(OperationContext* opCtx,
+ const StaleConfigInfo& se) const noexcept override {
+ resharding_metrics::onCriticalSectionError(opCtx, se);
+ }
+
// The refreshDatabase, refreshCollection, and refreshCatalogCache methods may have modified the
// locker state, in particular the flags which say if the operation took a write lock or shared
// lock. This will cause mongod to perhaps erroneously check for write concern when no writes
diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp
index 927d2e5d2de..b02c13c8421 100644
--- a/src/mongo/embedded/service_entry_point_embedded.cpp
+++ b/src/mongo/embedded/service_entry_point_embedded.cpp
@@ -137,6 +137,10 @@ public:
return false;
}
+ void handleReshardingCriticalSectionMetrics(OperationContext* opCtx,
+ const StaleConfigInfo& se) const noexcept override {
+ }
+
void resetLockerState(OperationContext* opCtx) const noexcept override {}
std::unique_ptr<PolymorphicScoped> scopedOperationCompletionShardingActions(
diff --git a/src/mongo/s/stale_exception.h b/src/mongo/s/stale_exception.h
index d82b18bab52..52b440760e5 100644
--- a/src/mongo/s/stale_exception.h
+++ b/src/mongo/s/stale_exception.h
@@ -40,17 +40,20 @@ namespace mongo {
class StaleConfigInfo final : public ErrorExtraInfo {
public:
static constexpr auto code = ErrorCodes::StaleConfig;
+ enum class OperationType { kRead, kWrite };
StaleConfigInfo(NamespaceString nss,
ChunkVersion received,
boost::optional<ChunkVersion> wanted,
ShardId shardId,
- boost::optional<SharedSemiFuture<void>> criticalSectionSignal = boost::none)
+ boost::optional<SharedSemiFuture<void>> criticalSectionSignal = boost::none,
+ boost::optional<OperationType> duringOperationType = boost::none)
: _nss(std::move(nss)),
_received(received),
_wanted(wanted),
_shardId(shardId),
- _criticalSectionSignal(std::move(criticalSectionSignal)) {}
+ _criticalSectionSignal(std::move(criticalSectionSignal)),
+ _duringOperationType{duringOperationType} {}
const auto& getNss() const {
return _nss;
@@ -72,6 +75,10 @@ public:
return _criticalSectionSignal;
}
+ const auto& getDuringOperationType() const {
+ return _duringOperationType;
+ }
+
void serialize(BSONObjBuilder* bob) const;
static std::shared_ptr<const ErrorExtraInfo> parse(const BSONObj& obj);
@@ -81,8 +88,9 @@ protected:
boost::optional<ChunkVersion> _wanted;
ShardId _shardId;
- // This signal does not get serialized and therefore does not get propagated to the router
+ // This fields below are not serialized and therefore do not get propagated to the router
boost::optional<SharedSemiFuture<void>> _criticalSectionSignal;
+ boost::optional<OperationType> _duringOperationType;
};
class StaleEpochInfo final : public ErrorExtraInfo {