summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/catalog/collection.h6
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp10
-rw-r--r--src/mongo/db/s/SConscript3
-rw-r--r--src/mongo/db/s/balancer_stats_registry.cpp286
-rw-r--r--src/mongo/db/s/balancer_stats_registry.h115
-rw-r--r--src/mongo/db/s/get_stats_for_balancing.idl72
-rw-r--r--src/mongo/db/s/migration_coordinator.cpp5
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp7
-rw-r--r--src/mongo/db/s/migration_destination_manager.h1
-rw-r--r--src/mongo/db/s/migration_util.cpp2
-rw-r--r--src/mongo/db/s/migration_util.h2
-rw-r--r--src/mongo/db/s/migration_util_test.cpp8
-rw-r--r--src/mongo/db/s/range_deletion_util.cpp3
-rw-r--r--src/mongo/db/s/shard_server_op_observer.cpp36
-rw-r--r--src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp145
15 files changed, 687 insertions, 14 deletions
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h
index 47384e3d346..f4799a02f81 100644
--- a/src/mongo/db/catalog/collection.h
+++ b/src/mongo/db/catalog/collection.h
@@ -739,6 +739,9 @@ public:
virtual long long numRecords(OperationContext* opCtx) const = 0;
+ /**
+ * Return uncompressed collection data size in bytes
+ */
virtual long long dataSize(OperationContext* opCtx) const = 0;
@@ -747,6 +750,9 @@ public:
*/
virtual bool isEmpty(OperationContext* opCtx) const = 0;
+ /**
+ * Return the average object size in bytes
+ */
virtual int averageObjectSize(OperationContext* opCtx) const = 0;
virtual uint64_t getIndexSize(OperationContext* opCtx,
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index 0212a664965..6c20e3ae89c 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -394,9 +394,9 @@ public:
if (request.getPhase() == SetFCVPhaseEnum::kStart) {
invariant(serverGlobalParams.clusterRole == ClusterRole::ShardServer);
- // TODO SERVER-64162 Destroy the BalancerStatsRegistry
if (actualVersion > requestedVersion &&
!feature_flags::gOrphanTracking.isEnabledOnVersion(requestedVersion)) {
+ BalancerStatsRegistry::get(opCtx)->terminate();
ScopedRangeDeleterLock rangeDeleterLock(opCtx);
clearOrphanCountersFromRangeDeletionTasks(opCtx);
}
@@ -466,9 +466,11 @@ public:
migrationutil::drainMigrationsPendingRecovery(opCtx);
if (orphanTrackingCondition) {
- // TODO SERVER-64162 Initialize the BalancerStatsRegistry
- ScopedRangeDeleterLock rangeDeleterLock(opCtx);
- setOrphanCountersOnRangeDeletionTasks(opCtx);
+ {
+ ScopedRangeDeleterLock rangeDeleterLock(opCtx);
+ setOrphanCountersOnRangeDeletionTasks(opCtx);
+ }
+ BalancerStatsRegistry::get(opCtx)->initializeAsync(opCtx);
}
}
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 96f60ade98d..c407df26173 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -47,6 +47,7 @@ env.Library(
'auto_split_vector.cpp',
'chunk_move_write_concern_options.cpp',
'chunk_split_state_driver.cpp',
+ 'balancer_stats_registry.cpp',
'chunk_splitter.cpp',
'collection_critical_section_document.idl',
'collection_sharding_runtime.cpp',
@@ -370,6 +371,7 @@ env.Library(
'flush_routing_table_cache_updates_command.cpp',
'get_database_version_command.cpp',
'get_shard_version_command.cpp',
+ 'get_stats_for_balancing.idl',
'merge_chunks_command.cpp',
'migration_chunk_cloner_source_legacy_commands.cpp',
'migration_destination_manager_legacy_commands.cpp',
@@ -409,6 +411,7 @@ env.Library(
'shardsvr_drop_collection_participant_command.cpp',
'shardsvr_drop_database_command.cpp',
'shardsvr_drop_database_participant_command.cpp',
+ 'shardsvr_get_stats_for_balancing_command.cpp',
'shardsvr_move_primary_command.cpp',
'shardsvr_move_range_command.cpp',
'shardsvr_participant_block_command.cpp',
diff --git a/src/mongo/db/s/balancer_stats_registry.cpp b/src/mongo/db/s/balancer_stats_registry.cpp
new file mode 100644
index 00000000000..f96e5e3b3bb
--- /dev/null
+++ b/src/mongo/db/s/balancer_stats_registry.cpp
@@ -0,0 +1,286 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
+#include "mongo/db/s/balancer_stats_registry.h"
+
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/pipeline/aggregate_command_gen.h"
+#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/range_deletion_task_gen.h"
+#include "mongo/db/s/range_deletion_util.h"
+#include "mongo/db/service_context.h"
+#include "mongo/logv2/log.h"
+#include "mongo/s/sharding_feature_flags_gen.h"
+
+namespace mongo {
+namespace {
+
+const auto balancerStatsRegistryDecorator =
+ ServiceContext::declareDecoration<BalancerStatsRegistry>();
+
+/**
+ * Constructs the default options for the private thread pool used for asyncronous initialization
+ */
+ThreadPool::Options makeDefaultThreadPoolOptions() {
+ ThreadPool::Options options;
+ options.poolName = "BalancerStatsRegistry";
+ options.minThreads = 0;
+ options.maxThreads = 1;
+
+ // Ensure all threads have a client
+ options.onCreateThread = [](const std::string& threadName) {
+ Client::initThread(threadName.c_str());
+ stdx::lock_guard<Client> lk{cc()};
+ cc().setSystemOperationKillableByStepdown(lk);
+ };
+
+ return options;
+}
+} // namespace
+
+const ReplicaSetAwareServiceRegistry::Registerer<BalancerStatsRegistry>
+ balancerStatsRegistryRegisterer("BalancerStatsRegistry");
+
+
+BalancerStatsRegistry* BalancerStatsRegistry::get(ServiceContext* serviceContext) {
+ return &balancerStatsRegistryDecorator(serviceContext);
+}
+
+BalancerStatsRegistry* BalancerStatsRegistry::get(OperationContext* opCtx) {
+ return get(opCtx->getServiceContext());
+}
+
+void BalancerStatsRegistry::onStartup(OperationContext* opCtx) {
+ _threadPool = std::make_shared<ThreadPool>(makeDefaultThreadPoolOptions());
+ _threadPool->startup();
+}
+
+void BalancerStatsRegistry::onStepUpComplete(OperationContext* opCtx, long long term) {
+ if (!feature_flags::gOrphanTracking.isEnabled(serverGlobalParams.featureCompatibility)) {
+ // If future flag is disabled do not initialize the registry
+ return;
+ }
+ initializeAsync(opCtx);
+}
+
+void BalancerStatsRegistry::initializeAsync(OperationContext* opCtx) {
+ ExecutorFuture<void>(_threadPool)
+ .then([this] {
+ // All threads in this pool
+ auto opCtxHolder{cc().makeOperationContext()};
+ opCtxHolder->setAlwaysInterruptAtStepDownOrUp();
+ auto opCtx{opCtxHolder.get()};
+
+ if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().primary()) {
+ return;
+ }
+
+ LOGV2_DEBUG(6419601, 2, "Initializing BalancerStatsRegistry");
+ try {
+ // Lock the range deleter to prevent
+ // concurrent modifications of orphans count
+ ScopedRangeDeleterLock rangeDeleterLock(opCtx);
+ // Load current ophans count from disk
+ _loadOrphansCount(opCtx);
+ LOGV2_DEBUG(6419602, 2, "Completed BalancerStatsRegistry initialization");
+ // Start accepting updates to the cached orphan count
+ _isInitialized.store(true);
+ // Unlock the range deleter (~ScopedRangeDeleterLock)
+ } catch (const DBException& ex) {
+ LOGV2_ERROR(6419600,
+ "Failed to initialize BalancerStatsRegistry after stepUp",
+ "error"_attr = redact(ex));
+ }
+ })
+ .getAsync([](auto) {});
+}
+
+void BalancerStatsRegistry::terminate() {
+ // Wait for the asyncronous initialization to complete
+ _threadPool->waitForIdle();
+ // Prevent future usages until next stepup
+ _isInitialized.store(false);
+ // Clear the stats
+ stdx::lock_guard lk{_mutex};
+ _collStatsMap.clear();
+ LOGV2_DEBUG(6419603, 2, "BalancerStatsRegistry terminated");
+}
+
+void BalancerStatsRegistry::onStepDown() {
+ terminate();
+}
+
+long long BalancerStatsRegistry::getCollNumOrphanDocs(const UUID& collectionUUID) const {
+ if (!_isInitialized.load())
+ uasserted(ErrorCodes::NotYetInitialized, "BalancerStatsRegistry is not initialized");
+
+ stdx::lock_guard lk{_mutex};
+ auto collStats = _collStatsMap.find(collectionUUID);
+ if (collStats != _collStatsMap.end()) {
+ return collStats->second.numOrphanDocs;
+ }
+ return 0;
+}
+
+void BalancerStatsRegistry::onRangeDeletionTaskInsertion(const UUID& collectionUUID,
+ long long numOrphanDocs) {
+ if (!_isInitialized.load())
+ return;
+
+ stdx::lock_guard lk{_mutex};
+ auto& stats = _collStatsMap[collectionUUID];
+ stats.numRangeDeletionTasks += 1;
+ stats.numOrphanDocs += numOrphanDocs;
+}
+
+void BalancerStatsRegistry::onRangeDeletionTaskDeletion(const UUID& collectionUUID,
+ long long numOrphanDocs) {
+ if (!_isInitialized.load())
+ return;
+
+ stdx::lock_guard lk{_mutex};
+ auto collStatsIt = _collStatsMap.find(collectionUUID);
+ if (collStatsIt == _collStatsMap.end()) {
+ LOGV2_ERROR(6419612,
+ "Couldn't find cached range deletion tasks count during decrese attempt",
+ "collectionUUID"_attr = collectionUUID,
+ "numOrphanDocs"_attr = numOrphanDocs);
+ return;
+ }
+ auto& stats = collStatsIt->second;
+
+ stats.numRangeDeletionTasks -= 1;
+ stats.numOrphanDocs -= numOrphanDocs;
+
+ if (stats.numRangeDeletionTasks <= 0) {
+ if (MONGO_unlikely(stats.numRangeDeletionTasks < 0)) {
+ LOGV2_ERROR(6419613,
+ "Cached count of range deletion tasks became negative. Resetting it to 0",
+ "collectionUUID"_attr = collectionUUID,
+ "numRangeDeletionTasks"_attr = stats.numRangeDeletionTasks,
+ "numOrphanDocs"_attr = stats.numRangeDeletionTasks);
+ }
+ _collStatsMap.erase(collStatsIt);
+ }
+}
+
+void BalancerStatsRegistry::updateOrphansCount(const UUID& collectionUUID, long long delta) {
+ if (!_isInitialized.load() || delta == 0)
+ return;
+
+ stdx::lock_guard lk{_mutex};
+ if (delta > 0) {
+ // Increase or create the stats if missing
+ _collStatsMap[collectionUUID].numOrphanDocs += delta;
+ } else {
+ // Decrease orphan docs count
+ auto collStatsIt = _collStatsMap.find(collectionUUID);
+ if (collStatsIt == _collStatsMap.end()) {
+ // This should happen only in case of direct manipulation of range deletion tasks
+ // documents or direct writes into orphaned ranges
+ LOGV2_ERROR(6419610,
+ "Couldn't find cached orphan documents count during decrese attempt",
+ "collectionUUID"_attr = collectionUUID,
+ "delta"_attr = delta);
+ return;
+ }
+ auto& stats = collStatsIt->second;
+
+ stats.numOrphanDocs += delta;
+
+ if (stats.numOrphanDocs < 0) {
+ // This should happen only in case of direct manipulation of range deletion tasks
+ // documents or direct writes into orphaned ranges
+ LOGV2_ERROR(6419611,
+ "Cached orphan documents count became negative, resetting it to 0",
+ "collectionUUID"_attr = collectionUUID,
+ "numOrphanDocs"_attr = stats.numOrphanDocs,
+ "delta"_attr = delta,
+ "numRangeDeletionTasks"_attr = stats.numRangeDeletionTasks);
+ stats.numOrphanDocs = 0;
+ }
+ }
+}
+
+
+void BalancerStatsRegistry::_loadOrphansCount(OperationContext* opCtx) {
+ static constexpr auto kNumOrphanDocsLabel = "numOrphanDocs"_sd;
+ static constexpr auto kNumRangeDeletionTasksLabel = "numRangeDeletionTasks"_sd;
+
+ /*
+ * {
+ * $group: {
+ * _id: $collectionUUID,
+ * numOrphanDocs: {$sum: $numOrphanDocs},
+ * numRangeDeletionTasks: {$count: {}},
+ * }
+ * }
+ */
+ static const BSONObj groupStage{
+ BSON("$group" << BSON("_id"
+ << "$" + RangeDeletionTask::kCollectionUuidFieldName
+ << kNumOrphanDocsLabel
+ << BSON("$sum"
+ << "$" + RangeDeletionTask::kNumOrphanDocsFieldName)
+ << kNumRangeDeletionTasksLabel << BSON("$count" << BSONObj())))};
+ AggregateCommandRequest aggRequest{NamespaceString::kRangeDeletionNamespace, {groupStage}};
+
+ DBDirectClient client{opCtx};
+ auto cursor = uassertStatusOK(DBClientCursor::fromAggregationRequest(
+ &client, std::move(aggRequest), false /* secondaryOk */, true /* useExhaust */));
+
+ {
+ stdx::lock_guard lk{_mutex};
+ _collStatsMap.clear();
+ while (cursor->more()) {
+ auto collObj = cursor->next();
+ auto collUUID = uassertStatusOK(UUID::parse(collObj["_id"]));
+ auto orphanCount = collObj[kNumOrphanDocsLabel].exactNumberLong();
+ auto numRangeDeletionTasks = collObj[kNumRangeDeletionTasksLabel].exactNumberLong();
+ invariant(numRangeDeletionTasks > 0);
+ if (orphanCount < 0) {
+ LOGV2_ERROR(6419621,
+ "Found negative orphan count in range deletion task documents",
+ "collectionUUID"_attr = collUUID,
+ "numOrphanDocs"_attr = orphanCount,
+ "numRangeDeletionTasks"_attr = numRangeDeletionTasks);
+ orphanCount = 0;
+ }
+ _collStatsMap.emplace(collUUID, CollectionStats{orphanCount, numRangeDeletionTasks});
+ }
+ LOGV2_DEBUG(6419604,
+ 2,
+ "Populated BalancerStatsRegistry cache",
+ "numCollections"_attr = _collStatsMap.size());
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/balancer_stats_registry.h b/src/mongo/db/s/balancer_stats_registry.h
new file mode 100644
index 00000000000..f7a7d20dd58
--- /dev/null
+++ b/src/mongo/db/s/balancer_stats_registry.h
@@ -0,0 +1,115 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/replica_set_aware_service.h"
+#include "mongo/util/concurrency/thread_pool.h"
+#include "mongo/util/uuid.h"
+
+namespace mongo {
+
+/**
+ * The BalancerStatsRegistry is used to cache metadata on shards, such as the orphan documents
+ * count. The blancer (on the config sever) periodically fetches this metadata through the
+ * _shardsvrGetStatsForBalancing command and uses it to evaluate balancing status of collections.
+ *
+ * The BalancerStatsRegistry is active only on replicaset primary nodes, it is initialized on stepUp
+ * and terminated on stepDown.
+ */
+
+class BalancerStatsRegistry : public ReplicaSetAwareServiceShardSvr<BalancerStatsRegistry> {
+
+ BalancerStatsRegistry(const BalancerStatsRegistry&) = delete;
+ BalancerStatsRegistry& operator=(const BalancerStatsRegistry&) = delete;
+
+public:
+ BalancerStatsRegistry() = default;
+
+ /**
+ * Obtains the service-wide instance.
+ */
+ static BalancerStatsRegistry* get(ServiceContext* serviceContext);
+ static BalancerStatsRegistry* get(OperationContext* opCtx);
+
+ /**
+ * Non blocking initialization. Performs an asyncronous initialization of this registry.
+ */
+ void initializeAsync(OperationContext* opCtx);
+
+ void terminate();
+
+ /**
+ * Update orphan document count for a specific collection.
+ * `delta` is the increment/decrement that will be applied to the current cached count.
+ *
+ * If the registy is not initialized this function will be a noop.
+ */
+ void updateOrphansCount(const UUID& collectionUUID, long long delta);
+ void onRangeDeletionTaskInsertion(const UUID& collectionUUID, long long numOrphanDocs);
+ void onRangeDeletionTaskDeletion(const UUID& collectionUUID, long long numOrphanDocs);
+
+ long long getCollNumOrphanDocs(const UUID& collectionUUID) const;
+
+private:
+ void onStartupRecoveryComplete(OperationContext* opCtx) override final {}
+ void onInitialSyncComplete(OperationContext* opCtx) override final {}
+ void onStepUpBegin(OperationContext* opCtx, long long term) override final {}
+ void onBecomeArbiter() override final {}
+ void onShutdown() override final {}
+
+ void onStartup(OperationContext* opCtx) override final;
+ void onStepUpComplete(OperationContext* opCtx, long long term) override final;
+ void onStepDown() override final;
+
+ void _initialize(OperationContext* opCtx);
+
+ void _loadOrphansCount(OperationContext* opCtx);
+
+ struct CollectionStats {
+ // Number of orphan documents for this collection
+ long long numOrphanDocs;
+ // Number of range deletion tasks
+ long long numRangeDeletionTasks;
+ };
+
+ // The registry could be only initialized when this node is replicaset primary.
+ // If the registry is not initialized this node could be either primary or secondary.
+ AtomicWord<bool> _isInitialized{false};
+
+ mutable Mutex _mutex = MONGO_MAKE_LATCH("BalancerStatsRegistry::_mutex");
+ // Map containing all the currently cached collection stats
+ stdx::unordered_map<UUID, CollectionStats, UUID::Hash> _collStatsMap;
+
+ // Thread pool used for asyncronous initialization
+ std::shared_ptr<ThreadPool> _threadPool;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/s/get_stats_for_balancing.idl b/src/mongo/db/s/get_stats_for_balancing.idl
new file mode 100644
index 00000000000..eac1207a213
--- /dev/null
+++ b/src/mongo/db/s/get_stats_for_balancing.idl
@@ -0,0 +1,72 @@
+# Copyright (C) 2022-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+global:
+ cpp_namespace: "mongo"
+
+imports:
+ - "mongo/idl/basic_types.idl"
+
+structs:
+ CollStatsForBalancing:
+ description: 'Collection stats for a specific collection'
+ strict: false
+ fields:
+ namespace:
+ description: 'Namespace of the collection'
+ type: namespacestring
+ cpp_name: ns
+ collSize:
+ description: 'size of data currently owned by this shard for this collection'
+ type: safeInt64
+
+ ShardsvrGetStatsForBalancingReply:
+ description: 'Response for ShardsvrGetStatsForBalancing command'
+ strict: false
+ fields:
+ stats:
+ description: 'List of stats for each of the requested collection'
+ type: array<CollStatsForBalancing>
+
+commands:
+ _shardsvrGetStatsForBalancing:
+ command_name: _shardsvrGetStatsForBalancing
+ cpp_name: ShardsvrGetStatsForBalancing
+ description: 'Internal command used by the balancer to retrieve stats for balancing.'
+ namespace: ignored
+ api_version: ''
+ strict: false
+ reply_type: ShardsvrGetStatsForBalancingReply
+ fields:
+ collections:
+ description: 'List of namespaces to retrieve statistic for'
+ type: array<namespacestring>
+ scaleFactor:
+ description: 'Scale factor for data size units. If omitted 1048576 (MiB) will be used'
+ type: exactInt64
+ optional: true
diff --git a/src/mongo/db/s/migration_coordinator.cpp b/src/mongo/db/s/migration_coordinator.cpp
index fe96ea1accb..b6ff4b7abd1 100644
--- a/src/mongo/db/s/migration_coordinator.cpp
+++ b/src/mongo/db/s/migration_coordinator.cpp
@@ -240,10 +240,11 @@ SemiFuture<void> MigrationCoordinator::_commitMigrationOnDonorAndRecipient(
"Retrieving number of orphan documents from recipient",
"migrationId"_attr = _migrationInfo.getId());
- auto numOrphans = migrationutil::retrieveNumOrphansFromRecipient(opCtx, _migrationInfo);
+ const auto numOrphans = migrationutil::retrieveNumOrphansFromRecipient(opCtx, _migrationInfo);
if (numOrphans > 0) {
- migrationutil::persistUpdatedNumOrphans(opCtx, _migrationInfo.getId(), numOrphans);
+ migrationutil::persistUpdatedNumOrphans(
+ opCtx, _migrationInfo.getId(), _migrationInfo.getCollectionUuid(), numOrphans);
}
LOGV2_DEBUG(23896,
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 1481b9d073e..cb9b554ff88 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -1133,6 +1133,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
return {uuid, indexes, idIndex, collOptions};
}();
+ _collectionUuid = donorCollectionOptionsAndIndexes.uuid;
+
auto fromShard = uassertStatusOK(
Grid::get(outerOpCtx)->shardRegistry()->getShard(outerOpCtx, _fromShard));
@@ -1327,7 +1329,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
}
migrationutil::persistUpdatedNumOrphans(
- opCtx, _migrationId.get(), batchNumCloned);
+ opCtx, _migrationId.get(), *_collectionUuid, batchNumCloned);
{
stdx::lock_guard<Latch> statsLock(_mutex);
@@ -1752,7 +1754,8 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
}
if (changeInOrphans != 0) {
- migrationutil::persistUpdatedNumOrphans(opCtx, _migrationId.get(), changeInOrphans);
+ migrationutil::persistUpdatedNumOrphans(
+ opCtx, _migrationId.get(), *_collectionUuid, changeInOrphans);
}
return didAnything;
}
diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h
index cf5bf351f83..c1421fc51ea 100644
--- a/src/mongo/db/s/migration_destination_manager.h
+++ b/src/mongo/db/s/migration_destination_manager.h
@@ -269,6 +269,7 @@ private:
stdx::thread _migrateThreadHandle;
boost::optional<UUID> _migrationId;
+ boost::optional<UUID> _collectionUuid;
LogicalSessionId _lsid;
TxnNumber _txnNumber{kUninitializedTxnNumber};
NamespaceString _nss;
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index 697340cc150..bd5120a1a43 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -672,6 +672,7 @@ void persistRangeDeletionTaskLocally(OperationContext* opCtx,
void persistUpdatedNumOrphans(OperationContext* opCtx,
const UUID& migrationId,
+ const UUID& collectionUuid,
long long changeInOrphans) {
// TODO (SERVER-63819) Remove numOrphanDocsFieldName field from the query
// Add $exists to the query to ensure that on upgrade and downgrade, the numOrphanDocs field
@@ -690,6 +691,7 @@ void persistUpdatedNumOrphans(OperationContext* opCtx,
<< changeInOrphans)),
WriteConcerns::kLocalWriteConcern);
});
+ BalancerStatsRegistry::get(opCtx)->updateOrphansCount(collectionUuid, changeInOrphans);
} catch (const ExceptionFor<ErrorCodes::NoMatchingDocument>&) {
// When upgrading or downgrading, there may be no documents with the orphan count field.
}
diff --git a/src/mongo/db/s/migration_util.h b/src/mongo/db/s/migration_util.h
index 49de15b74f4..66e88d609d8 100644
--- a/src/mongo/db/s/migration_util.h
+++ b/src/mongo/db/s/migration_util.h
@@ -31,6 +31,7 @@
#include "mongo/db/persistent_task_store.h"
#include "mongo/db/repl/optime.h"
+#include "mongo/db/s/balancer_stats_registry.h"
#include "mongo/db/s/collection_metadata.h"
#include "mongo/db/s/migration_coordinator_document_gen.h"
#include "mongo/db/s/migration_recipient_recovery_document_gen.h"
@@ -149,6 +150,7 @@ void persistRangeDeletionTaskLocally(OperationContext* opCtx,
*/
void persistUpdatedNumOrphans(OperationContext* opCtx,
const UUID& migrationId,
+ const UUID& collectionUuid,
long long changeInOrphans);
/**
diff --git a/src/mongo/db/s/migration_util_test.cpp b/src/mongo/db/s/migration_util_test.cpp
index 355043931e5..24fb4d3567e 100644
--- a/src/mongo/db/s/migration_util_test.cpp
+++ b/src/mongo/db/s/migration_util_test.cpp
@@ -341,17 +341,17 @@ TEST_F(MigrationUtilsTest, TestInvalidUUID) {
TEST_F(MigrationUtilsTest, TestUpdateNumberOfOrphans) {
auto opCtx = operationContext();
- const auto uuid = UUID::gen();
+ const auto collectionUuid = UUID::gen();
PersistentTaskStore<RangeDeletionTask> store(NamespaceString::kRangeDeletionNamespace);
- auto rangeDeletionDoc = createDeletionTask(opCtx, kTestNss, uuid, 0, 10);
+ auto rangeDeletionDoc = createDeletionTask(opCtx, kTestNss, collectionUuid, 0, 10);
rangeDeletionDoc.setNumOrphanDocs(0);
store.add(opCtx, rangeDeletionDoc);
- migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), 5);
+ migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), collectionUuid, 5);
rangeDeletionDoc.setNumOrphanDocs(5);
ASSERT_EQ(store.count(opCtx, rangeDeletionDoc.toBSON().removeField("timestamp")), 1);
- migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), -5);
+ migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), collectionUuid, -5);
rangeDeletionDoc.setNumOrphanDocs(0);
ASSERT_EQ(store.count(opCtx, rangeDeletionDoc.toBSON().removeField("timestamp")), 1);
}
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp
index 5beae5cc6b6..99ef78ed220 100644
--- a/src/mongo/db/s/range_deletion_util.cpp
+++ b/src/mongo/db/s/range_deletion_util.cpp
@@ -325,7 +325,8 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx
keyPattern,
range,
numDocsToRemovePerBatch));
- migrationutil::persistUpdatedNumOrphans(opCtx, migrationId, -numDeleted);
+ migrationutil::persistUpdatedNumOrphans(
+ opCtx, migrationId, collectionUuid, -numDeleted);
if (MONGO_unlikely(hangAfterDoingDeletion.shouldFail())) {
hangAfterDoingDeletion.pauseWhileSet(opCtx);
diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp
index b4214be121c..228ef5ffbb9 100644
--- a/src/mongo/db/s/shard_server_op_observer.cpp
+++ b/src/mongo/db/s/shard_server_op_observer.cpp
@@ -36,6 +36,7 @@
#include "mongo/bson/util/bson_extract.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/op_observer_impl.h"
+#include "mongo/db/s/balancer_stats_registry.h"
#include "mongo/db/s/chunk_split_state_driver.h"
#include "mongo/db/s/chunk_splitter.h"
#include "mongo/db/s/collection_critical_section_document_gen.h"
@@ -57,6 +58,7 @@
#include "mongo/s/cannot_implicitly_create_collection_info.h"
#include "mongo/s/catalog_cache_loader.h"
#include "mongo/s/grid.h"
+#include "mongo/s/sharding_feature_flags_gen.h"
namespace mongo {
namespace {
@@ -268,6 +270,10 @@ void ShardServerOpObserver::onInserts(OperationContext* opCtx,
opCtx->recoveryUnit()->registerChange(
std::make_unique<SubmitRangeDeletionHandler>(opCtx, deletionTask));
}
+
+ const auto numOrphanDocs = deletionTask.getNumOrphanDocs().value_or(0);
+ BalancerStatsRegistry::get(opCtx)->onRangeDeletionTaskInsertion(
+ deletionTask.getCollectionUuid(), numOrphanDocs);
}
if (nss == NamespaceString::kCollectionCriticalSectionsNamespace &&
@@ -446,7 +452,8 @@ void ShardServerOpObserver::aboutToDelete(OperationContext* opCtx,
const UUID& uuid,
BSONObj const& doc) {
- if (nss == NamespaceString::kCollectionCriticalSectionsNamespace) {
+ if (nss == NamespaceString::kCollectionCriticalSectionsNamespace ||
+ nss == NamespaceString::kRangeDeletionNamespace) {
documentIdDecoration(opCtx) = doc;
} else {
// Extract the _id field from the document. If it does not have an _id, use the
@@ -527,6 +534,33 @@ void ShardServerOpObserver::onDelete(OperationContext* opCtx,
csr->exitCriticalSection(csrLock, reason);
});
}
+
+ if (nss == NamespaceString::kRangeDeletionNamespace) {
+ if (!feature_flags::gOrphanTracking.isEnabled(serverGlobalParams.featureCompatibility)) {
+ return;
+ }
+
+ const auto& deletedDoc = documentId;
+
+ const auto numOrphanDocs = [&] {
+ auto numOrphanDocsElem = update_oplog_entry::extractNewValueForField(
+ deletedDoc, RangeDeletionTask::kNumOrphanDocsFieldName);
+ return numOrphanDocsElem ? numOrphanDocsElem.exactNumberLong() : 0;
+ }();
+
+ auto collUuid = [&] {
+ BSONElement collUuidElem;
+ uassertStatusOK(bsonExtractField(
+ documentId, RangeDeletionTask::kCollectionUuidFieldName, &collUuidElem));
+ return uassertStatusOK(UUID::parse(std::move(collUuidElem)));
+ }();
+
+ opCtx->recoveryUnit()->onCommit([opCtx = opCtx,
+ collUuid = std::move(collUuid),
+ numOrphanDocs](boost::optional<Timestamp>) {
+ BalancerStatsRegistry::get(opCtx)->onRangeDeletionTaskDeletion(collUuid, numOrphanDocs);
+ });
+ }
}
void ShardServerOpObserver::onCreateCollection(OperationContext* opCtx,
diff --git a/src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp b/src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp
new file mode 100644
index 00000000000..743e895737f
--- /dev/null
+++ b/src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp
@@ -0,0 +1,145 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/s/balancer_stats_registry.h"
+#include "mongo/db/s/get_stats_for_balancing_gen.h"
+#include "mongo/db/s/sharding_state.h"
+#include "mongo/logv2/log.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/sharding_feature_flags_gen.h"
+
+namespace mongo {
+namespace {
+
+class ShardsvrGetStatsForBalancingCmd final : public TypedCommand<ShardsvrGetStatsForBalancingCmd> {
+public:
+ using Request = ShardsvrGetStatsForBalancing;
+ using Reply = ShardsvrGetStatsForBalancingReply;
+
+ // Default scale factor for data size (MiB)
+ static constexpr int kDefaultScaleFactorMB{1024 * 1024};
+
+ bool skipApiVersionCheck() const override {
+ // Internal command (config -> shard).
+ return true;
+ }
+
+ std::string help() const override {
+ return "Internal command invoked by the config server to retrieve statistics from shard "
+ "used for balancing";
+ }
+
+ AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
+ return AllowedOnSecondary::kNever;
+ }
+
+ bool adminOnly() const override {
+ return true;
+ }
+
+ class Invocation final : public InvocationBase {
+ public:
+ using InvocationBase::InvocationBase;
+
+ Reply typedRun(OperationContext* opCtx) {
+ uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands());
+ opCtx->setAlwaysInterruptAtStepDownOrUp();
+
+ uassert(ErrorCodes::InvalidOptions,
+ "At least one collection must be specified",
+ request().getCollections().size());
+
+ const auto scaleFactor = request().getScaleFactor().get_value_or(kDefaultScaleFactorMB);
+ std::vector<CollStatsForBalancing> collStats;
+ collStats.reserve(request().getCollections().size());
+
+ for (const auto& ns : request().getCollections()) {
+ const auto collDataSizeScaled =
+ static_cast<long long>(_getCollDataSizeBytes(opCtx, ns) / scaleFactor);
+ collStats.emplace_back(ns, collDataSizeScaled);
+ }
+ return {std::move(collStats)};
+ }
+
+ private:
+ long long _getCollDataSizeBytes(OperationContext* opCtx, const NamespaceString& ns) const {
+ boost::optional<UUID> collUUID;
+ long long numRecords{0};
+ long long dataSizeBytes{0};
+
+ if (AutoGetCollectionForReadCommandMaybeLockFree autoColl{opCtx, ns}) {
+ collUUID = autoColl->uuid();
+ numRecords = autoColl->numRecords(opCtx);
+ dataSizeBytes = autoColl->dataSize(opCtx);
+ }
+
+ // If the collection doesn't exists or is empty return 0
+ if (dataSizeBytes == 0) {
+ return 0LL;
+ }
+
+ const long long numOrphanDocs = [&] {
+ if (!feature_flags::gOrphanTracking.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
+ return 0LL;
+ }
+ return BalancerStatsRegistry::get(opCtx)->getCollNumOrphanDocs(*collUUID);
+ }();
+
+ invariant(numRecords >= numOrphanDocs);
+
+ const auto avgObjSizeBytes = static_cast<long long>(dataSizeBytes / numRecords);
+ return avgObjSizeBytes * (numRecords - numOrphanDocs);
+ }
+
+ NamespaceString ns() const override {
+ return {request().getDbName(), ""};
+ }
+
+ bool supportsWriteConcern() const override {
+ return false;
+ }
+
+ void doCheckAuthorization(OperationContext* opCtx) const override {
+ uassert(ErrorCodes::Unauthorized,
+ "Unauthorized",
+ AuthorizationSession::get(opCtx->getClient())
+ ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
+ ActionType::internal));
+ }
+ };
+} _shardsvrGetStatsForBalancingCmd;
+
+} // namespace
+} // namespace mongo