diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/catalog/collection.h | 6 | ||||
-rw-r--r-- | src/mongo/db/commands/set_feature_compatibility_version_command.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/s/SConscript | 3 | ||||
-rw-r--r-- | src/mongo/db/s/balancer_stats_registry.cpp | 286 | ||||
-rw-r--r-- | src/mongo/db/s/balancer_stats_registry.h | 115 | ||||
-rw-r--r-- | src/mongo/db/s/get_stats_for_balancing.idl | 72 | ||||
-rw-r--r-- | src/mongo/db/s/migration_coordinator.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.h | 1 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.h | 2 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/range_deletion_util.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/s/shard_server_op_observer.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp | 145 |
15 files changed, 687 insertions, 14 deletions
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index 47384e3d346..f4799a02f81 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -739,6 +739,9 @@ public: virtual long long numRecords(OperationContext* opCtx) const = 0; + /** + * Return uncompressed collection data size in bytes + */ virtual long long dataSize(OperationContext* opCtx) const = 0; @@ -747,6 +750,9 @@ public: */ virtual bool isEmpty(OperationContext* opCtx) const = 0; + /** + * Return the average object size in bytes + */ virtual int averageObjectSize(OperationContext* opCtx) const = 0; virtual uint64_t getIndexSize(OperationContext* opCtx, diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index 0212a664965..6c20e3ae89c 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -394,9 +394,9 @@ public: if (request.getPhase() == SetFCVPhaseEnum::kStart) { invariant(serverGlobalParams.clusterRole == ClusterRole::ShardServer); - // TODO SERVER-64162 Destroy the BalancerStatsRegistry if (actualVersion > requestedVersion && !feature_flags::gOrphanTracking.isEnabledOnVersion(requestedVersion)) { + BalancerStatsRegistry::get(opCtx)->terminate(); ScopedRangeDeleterLock rangeDeleterLock(opCtx); clearOrphanCountersFromRangeDeletionTasks(opCtx); } @@ -466,9 +466,11 @@ public: migrationutil::drainMigrationsPendingRecovery(opCtx); if (orphanTrackingCondition) { - // TODO SERVER-64162 Initialize the BalancerStatsRegistry - ScopedRangeDeleterLock rangeDeleterLock(opCtx); - setOrphanCountersOnRangeDeletionTasks(opCtx); + { + ScopedRangeDeleterLock rangeDeleterLock(opCtx); + setOrphanCountersOnRangeDeletionTasks(opCtx); + } + BalancerStatsRegistry::get(opCtx)->initializeAsync(opCtx); } } diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index 96f60ade98d..c407df26173 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -47,6 +47,7 @@ env.Library( 'auto_split_vector.cpp', 'chunk_move_write_concern_options.cpp', 'chunk_split_state_driver.cpp', + 'balancer_stats_registry.cpp', 'chunk_splitter.cpp', 'collection_critical_section_document.idl', 'collection_sharding_runtime.cpp', @@ -370,6 +371,7 @@ env.Library( 'flush_routing_table_cache_updates_command.cpp', 'get_database_version_command.cpp', 'get_shard_version_command.cpp', + 'get_stats_for_balancing.idl', 'merge_chunks_command.cpp', 'migration_chunk_cloner_source_legacy_commands.cpp', 'migration_destination_manager_legacy_commands.cpp', @@ -409,6 +411,7 @@ env.Library( 'shardsvr_drop_collection_participant_command.cpp', 'shardsvr_drop_database_command.cpp', 'shardsvr_drop_database_participant_command.cpp', + 'shardsvr_get_stats_for_balancing_command.cpp', 'shardsvr_move_primary_command.cpp', 'shardsvr_move_range_command.cpp', 'shardsvr_participant_block_command.cpp', diff --git a/src/mongo/db/s/balancer_stats_registry.cpp b/src/mongo/db/s/balancer_stats_registry.cpp new file mode 100644 index 00000000000..f96e5e3b3bb --- /dev/null +++ b/src/mongo/db/s/balancer_stats_registry.cpp @@ -0,0 +1,286 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding + +#include "mongo/db/s/balancer_stats_registry.h" + +#include "mongo/db/dbdirectclient.h" +#include "mongo/db/pipeline/aggregate_command_gen.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/s/range_deletion_task_gen.h" +#include "mongo/db/s/range_deletion_util.h" +#include "mongo/db/service_context.h" +#include "mongo/logv2/log.h" +#include "mongo/s/sharding_feature_flags_gen.h" + +namespace mongo { +namespace { + +const auto balancerStatsRegistryDecorator = + ServiceContext::declareDecoration<BalancerStatsRegistry>(); + +/** + * Constructs the default options for the private thread pool used for asyncronous initialization + */ +ThreadPool::Options makeDefaultThreadPoolOptions() { + ThreadPool::Options options; + options.poolName = "BalancerStatsRegistry"; + options.minThreads = 0; + options.maxThreads = 1; + + // Ensure all threads have a client + options.onCreateThread = [](const std::string& threadName) { + Client::initThread(threadName.c_str()); + stdx::lock_guard<Client> lk{cc()}; + cc().setSystemOperationKillableByStepdown(lk); + }; + + return options; +} +} // namespace + +const ReplicaSetAwareServiceRegistry::Registerer<BalancerStatsRegistry> + balancerStatsRegistryRegisterer("BalancerStatsRegistry"); + + +BalancerStatsRegistry* BalancerStatsRegistry::get(ServiceContext* serviceContext) { + return &balancerStatsRegistryDecorator(serviceContext); +} + +BalancerStatsRegistry* BalancerStatsRegistry::get(OperationContext* opCtx) { + return get(opCtx->getServiceContext()); +} + +void BalancerStatsRegistry::onStartup(OperationContext* opCtx) { + _threadPool = std::make_shared<ThreadPool>(makeDefaultThreadPoolOptions()); + _threadPool->startup(); +} + +void BalancerStatsRegistry::onStepUpComplete(OperationContext* opCtx, long long term) { + if (!feature_flags::gOrphanTracking.isEnabled(serverGlobalParams.featureCompatibility)) { + // If future flag is disabled do not initialize the registry + return; + } + initializeAsync(opCtx); +} + +void BalancerStatsRegistry::initializeAsync(OperationContext* opCtx) { + ExecutorFuture<void>(_threadPool) + .then([this] { + // All threads in this pool + auto opCtxHolder{cc().makeOperationContext()}; + opCtxHolder->setAlwaysInterruptAtStepDownOrUp(); + auto opCtx{opCtxHolder.get()}; + + if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().primary()) { + return; + } + + LOGV2_DEBUG(6419601, 2, "Initializing BalancerStatsRegistry"); + try { + // Lock the range deleter to prevent + // concurrent modifications of orphans count + ScopedRangeDeleterLock rangeDeleterLock(opCtx); + // Load current ophans count from disk + _loadOrphansCount(opCtx); + LOGV2_DEBUG(6419602, 2, "Completed BalancerStatsRegistry initialization"); + // Start accepting updates to the cached orphan count + _isInitialized.store(true); + // Unlock the range deleter (~ScopedRangeDeleterLock) + } catch (const DBException& ex) { + LOGV2_ERROR(6419600, + "Failed to initialize BalancerStatsRegistry after stepUp", + "error"_attr = redact(ex)); + } + }) + .getAsync([](auto) {}); +} + +void BalancerStatsRegistry::terminate() { + // Wait for the asyncronous initialization to complete + _threadPool->waitForIdle(); + // Prevent future usages until next stepup + _isInitialized.store(false); + // Clear the stats + stdx::lock_guard lk{_mutex}; + _collStatsMap.clear(); + LOGV2_DEBUG(6419603, 2, "BalancerStatsRegistry terminated"); +} + +void BalancerStatsRegistry::onStepDown() { + terminate(); +} + +long long BalancerStatsRegistry::getCollNumOrphanDocs(const UUID& collectionUUID) const { + if (!_isInitialized.load()) + uasserted(ErrorCodes::NotYetInitialized, "BalancerStatsRegistry is not initialized"); + + stdx::lock_guard lk{_mutex}; + auto collStats = _collStatsMap.find(collectionUUID); + if (collStats != _collStatsMap.end()) { + return collStats->second.numOrphanDocs; + } + return 0; +} + +void BalancerStatsRegistry::onRangeDeletionTaskInsertion(const UUID& collectionUUID, + long long numOrphanDocs) { + if (!_isInitialized.load()) + return; + + stdx::lock_guard lk{_mutex}; + auto& stats = _collStatsMap[collectionUUID]; + stats.numRangeDeletionTasks += 1; + stats.numOrphanDocs += numOrphanDocs; +} + +void BalancerStatsRegistry::onRangeDeletionTaskDeletion(const UUID& collectionUUID, + long long numOrphanDocs) { + if (!_isInitialized.load()) + return; + + stdx::lock_guard lk{_mutex}; + auto collStatsIt = _collStatsMap.find(collectionUUID); + if (collStatsIt == _collStatsMap.end()) { + LOGV2_ERROR(6419612, + "Couldn't find cached range deletion tasks count during decrese attempt", + "collectionUUID"_attr = collectionUUID, + "numOrphanDocs"_attr = numOrphanDocs); + return; + } + auto& stats = collStatsIt->second; + + stats.numRangeDeletionTasks -= 1; + stats.numOrphanDocs -= numOrphanDocs; + + if (stats.numRangeDeletionTasks <= 0) { + if (MONGO_unlikely(stats.numRangeDeletionTasks < 0)) { + LOGV2_ERROR(6419613, + "Cached count of range deletion tasks became negative. Resetting it to 0", + "collectionUUID"_attr = collectionUUID, + "numRangeDeletionTasks"_attr = stats.numRangeDeletionTasks, + "numOrphanDocs"_attr = stats.numRangeDeletionTasks); + } + _collStatsMap.erase(collStatsIt); + } +} + +void BalancerStatsRegistry::updateOrphansCount(const UUID& collectionUUID, long long delta) { + if (!_isInitialized.load() || delta == 0) + return; + + stdx::lock_guard lk{_mutex}; + if (delta > 0) { + // Increase or create the stats if missing + _collStatsMap[collectionUUID].numOrphanDocs += delta; + } else { + // Decrease orphan docs count + auto collStatsIt = _collStatsMap.find(collectionUUID); + if (collStatsIt == _collStatsMap.end()) { + // This should happen only in case of direct manipulation of range deletion tasks + // documents or direct writes into orphaned ranges + LOGV2_ERROR(6419610, + "Couldn't find cached orphan documents count during decrese attempt", + "collectionUUID"_attr = collectionUUID, + "delta"_attr = delta); + return; + } + auto& stats = collStatsIt->second; + + stats.numOrphanDocs += delta; + + if (stats.numOrphanDocs < 0) { + // This should happen only in case of direct manipulation of range deletion tasks + // documents or direct writes into orphaned ranges + LOGV2_ERROR(6419611, + "Cached orphan documents count became negative, resetting it to 0", + "collectionUUID"_attr = collectionUUID, + "numOrphanDocs"_attr = stats.numOrphanDocs, + "delta"_attr = delta, + "numRangeDeletionTasks"_attr = stats.numRangeDeletionTasks); + stats.numOrphanDocs = 0; + } + } +} + + +void BalancerStatsRegistry::_loadOrphansCount(OperationContext* opCtx) { + static constexpr auto kNumOrphanDocsLabel = "numOrphanDocs"_sd; + static constexpr auto kNumRangeDeletionTasksLabel = "numRangeDeletionTasks"_sd; + + /* + * { + * $group: { + * _id: $collectionUUID, + * numOrphanDocs: {$sum: $numOrphanDocs}, + * numRangeDeletionTasks: {$count: {}}, + * } + * } + */ + static const BSONObj groupStage{ + BSON("$group" << BSON("_id" + << "$" + RangeDeletionTask::kCollectionUuidFieldName + << kNumOrphanDocsLabel + << BSON("$sum" + << "$" + RangeDeletionTask::kNumOrphanDocsFieldName) + << kNumRangeDeletionTasksLabel << BSON("$count" << BSONObj())))}; + AggregateCommandRequest aggRequest{NamespaceString::kRangeDeletionNamespace, {groupStage}}; + + DBDirectClient client{opCtx}; + auto cursor = uassertStatusOK(DBClientCursor::fromAggregationRequest( + &client, std::move(aggRequest), false /* secondaryOk */, true /* useExhaust */)); + + { + stdx::lock_guard lk{_mutex}; + _collStatsMap.clear(); + while (cursor->more()) { + auto collObj = cursor->next(); + auto collUUID = uassertStatusOK(UUID::parse(collObj["_id"])); + auto orphanCount = collObj[kNumOrphanDocsLabel].exactNumberLong(); + auto numRangeDeletionTasks = collObj[kNumRangeDeletionTasksLabel].exactNumberLong(); + invariant(numRangeDeletionTasks > 0); + if (orphanCount < 0) { + LOGV2_ERROR(6419621, + "Found negative orphan count in range deletion task documents", + "collectionUUID"_attr = collUUID, + "numOrphanDocs"_attr = orphanCount, + "numRangeDeletionTasks"_attr = numRangeDeletionTasks); + orphanCount = 0; + } + _collStatsMap.emplace(collUUID, CollectionStats{orphanCount, numRangeDeletionTasks}); + } + LOGV2_DEBUG(6419604, + 2, + "Populated BalancerStatsRegistry cache", + "numCollections"_attr = _collStatsMap.size()); + } +} + +} // namespace mongo diff --git a/src/mongo/db/s/balancer_stats_registry.h b/src/mongo/db/s/balancer_stats_registry.h new file mode 100644 index 00000000000..f7a7d20dd58 --- /dev/null +++ b/src/mongo/db/s/balancer_stats_registry.h @@ -0,0 +1,115 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/operation_context.h" +#include "mongo/db/repl/replica_set_aware_service.h" +#include "mongo/util/concurrency/thread_pool.h" +#include "mongo/util/uuid.h" + +namespace mongo { + +/** + * The BalancerStatsRegistry is used to cache metadata on shards, such as the orphan documents + * count. The blancer (on the config sever) periodically fetches this metadata through the + * _shardsvrGetStatsForBalancing command and uses it to evaluate balancing status of collections. + * + * The BalancerStatsRegistry is active only on replicaset primary nodes, it is initialized on stepUp + * and terminated on stepDown. + */ + +class BalancerStatsRegistry : public ReplicaSetAwareServiceShardSvr<BalancerStatsRegistry> { + + BalancerStatsRegistry(const BalancerStatsRegistry&) = delete; + BalancerStatsRegistry& operator=(const BalancerStatsRegistry&) = delete; + +public: + BalancerStatsRegistry() = default; + + /** + * Obtains the service-wide instance. + */ + static BalancerStatsRegistry* get(ServiceContext* serviceContext); + static BalancerStatsRegistry* get(OperationContext* opCtx); + + /** + * Non blocking initialization. Performs an asyncronous initialization of this registry. + */ + void initializeAsync(OperationContext* opCtx); + + void terminate(); + + /** + * Update orphan document count for a specific collection. + * `delta` is the increment/decrement that will be applied to the current cached count. + * + * If the registy is not initialized this function will be a noop. + */ + void updateOrphansCount(const UUID& collectionUUID, long long delta); + void onRangeDeletionTaskInsertion(const UUID& collectionUUID, long long numOrphanDocs); + void onRangeDeletionTaskDeletion(const UUID& collectionUUID, long long numOrphanDocs); + + long long getCollNumOrphanDocs(const UUID& collectionUUID) const; + +private: + void onStartupRecoveryComplete(OperationContext* opCtx) override final {} + void onInitialSyncComplete(OperationContext* opCtx) override final {} + void onStepUpBegin(OperationContext* opCtx, long long term) override final {} + void onBecomeArbiter() override final {} + void onShutdown() override final {} + + void onStartup(OperationContext* opCtx) override final; + void onStepUpComplete(OperationContext* opCtx, long long term) override final; + void onStepDown() override final; + + void _initialize(OperationContext* opCtx); + + void _loadOrphansCount(OperationContext* opCtx); + + struct CollectionStats { + // Number of orphan documents for this collection + long long numOrphanDocs; + // Number of range deletion tasks + long long numRangeDeletionTasks; + }; + + // The registry could be only initialized when this node is replicaset primary. + // If the registry is not initialized this node could be either primary or secondary. + AtomicWord<bool> _isInitialized{false}; + + mutable Mutex _mutex = MONGO_MAKE_LATCH("BalancerStatsRegistry::_mutex"); + // Map containing all the currently cached collection stats + stdx::unordered_map<UUID, CollectionStats, UUID::Hash> _collStatsMap; + + // Thread pool used for asyncronous initialization + std::shared_ptr<ThreadPool> _threadPool; +}; + +} // namespace mongo diff --git a/src/mongo/db/s/get_stats_for_balancing.idl b/src/mongo/db/s/get_stats_for_balancing.idl new file mode 100644 index 00000000000..eac1207a213 --- /dev/null +++ b/src/mongo/db/s/get_stats_for_balancing.idl @@ -0,0 +1,72 @@ +# Copyright (C) 2022-present MongoDB, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the Server Side Public License, version 1, +# as published by MongoDB, Inc. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Server Side Public License for more details. +# +# You should have received a copy of the Server Side Public License +# along with this program. If not, see +# <http://www.mongodb.com/licensing/server-side-public-license>. +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the Server Side Public License in all respects for +# all of the code used other than as permitted herein. If you modify file(s) +# with this exception, you may extend this exception to your version of the +# file(s), but you are not obligated to do so. If you do not wish to do so, +# delete this exception statement from your version. If you delete this +# exception statement from all source files in the program, then also delete +# it in the license file. +# + +global: + cpp_namespace: "mongo" + +imports: + - "mongo/idl/basic_types.idl" + +structs: + CollStatsForBalancing: + description: 'Collection stats for a specific collection' + strict: false + fields: + namespace: + description: 'Namespace of the collection' + type: namespacestring + cpp_name: ns + collSize: + description: 'size of data currently owned by this shard for this collection' + type: safeInt64 + + ShardsvrGetStatsForBalancingReply: + description: 'Response for ShardsvrGetStatsForBalancing command' + strict: false + fields: + stats: + description: 'List of stats for each of the requested collection' + type: array<CollStatsForBalancing> + +commands: + _shardsvrGetStatsForBalancing: + command_name: _shardsvrGetStatsForBalancing + cpp_name: ShardsvrGetStatsForBalancing + description: 'Internal command used by the balancer to retrieve stats for balancing.' + namespace: ignored + api_version: '' + strict: false + reply_type: ShardsvrGetStatsForBalancingReply + fields: + collections: + description: 'List of namespaces to retrieve statistic for' + type: array<namespacestring> + scaleFactor: + description: 'Scale factor for data size units. If omitted 1048576 (MiB) will be used' + type: exactInt64 + optional: true diff --git a/src/mongo/db/s/migration_coordinator.cpp b/src/mongo/db/s/migration_coordinator.cpp index fe96ea1accb..b6ff4b7abd1 100644 --- a/src/mongo/db/s/migration_coordinator.cpp +++ b/src/mongo/db/s/migration_coordinator.cpp @@ -240,10 +240,11 @@ SemiFuture<void> MigrationCoordinator::_commitMigrationOnDonorAndRecipient( "Retrieving number of orphan documents from recipient", "migrationId"_attr = _migrationInfo.getId()); - auto numOrphans = migrationutil::retrieveNumOrphansFromRecipient(opCtx, _migrationInfo); + const auto numOrphans = migrationutil::retrieveNumOrphansFromRecipient(opCtx, _migrationInfo); if (numOrphans > 0) { - migrationutil::persistUpdatedNumOrphans(opCtx, _migrationInfo.getId(), numOrphans); + migrationutil::persistUpdatedNumOrphans( + opCtx, _migrationInfo.getId(), _migrationInfo.getCollectionUuid(), numOrphans); } LOGV2_DEBUG(23896, diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 1481b9d073e..cb9b554ff88 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1133,6 +1133,8 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, return {uuid, indexes, idIndex, collOptions}; }(); + _collectionUuid = donorCollectionOptionsAndIndexes.uuid; + auto fromShard = uassertStatusOK( Grid::get(outerOpCtx)->shardRegistry()->getShard(outerOpCtx, _fromShard)); @@ -1327,7 +1329,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, } migrationutil::persistUpdatedNumOrphans( - opCtx, _migrationId.get(), batchNumCloned); + opCtx, _migrationId.get(), *_collectionUuid, batchNumCloned); { stdx::lock_guard<Latch> statsLock(_mutex); @@ -1752,7 +1754,8 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const } if (changeInOrphans != 0) { - migrationutil::persistUpdatedNumOrphans(opCtx, _migrationId.get(), changeInOrphans); + migrationutil::persistUpdatedNumOrphans( + opCtx, _migrationId.get(), *_collectionUuid, changeInOrphans); } return didAnything; } diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h index cf5bf351f83..c1421fc51ea 100644 --- a/src/mongo/db/s/migration_destination_manager.h +++ b/src/mongo/db/s/migration_destination_manager.h @@ -269,6 +269,7 @@ private: stdx::thread _migrateThreadHandle; boost::optional<UUID> _migrationId; + boost::optional<UUID> _collectionUuid; LogicalSessionId _lsid; TxnNumber _txnNumber{kUninitializedTxnNumber}; NamespaceString _nss; diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 697340cc150..bd5120a1a43 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -672,6 +672,7 @@ void persistRangeDeletionTaskLocally(OperationContext* opCtx, void persistUpdatedNumOrphans(OperationContext* opCtx, const UUID& migrationId, + const UUID& collectionUuid, long long changeInOrphans) { // TODO (SERVER-63819) Remove numOrphanDocsFieldName field from the query // Add $exists to the query to ensure that on upgrade and downgrade, the numOrphanDocs field @@ -690,6 +691,7 @@ void persistUpdatedNumOrphans(OperationContext* opCtx, << changeInOrphans)), WriteConcerns::kLocalWriteConcern); }); + BalancerStatsRegistry::get(opCtx)->updateOrphansCount(collectionUuid, changeInOrphans); } catch (const ExceptionFor<ErrorCodes::NoMatchingDocument>&) { // When upgrading or downgrading, there may be no documents with the orphan count field. } diff --git a/src/mongo/db/s/migration_util.h b/src/mongo/db/s/migration_util.h index 49de15b74f4..66e88d609d8 100644 --- a/src/mongo/db/s/migration_util.h +++ b/src/mongo/db/s/migration_util.h @@ -31,6 +31,7 @@ #include "mongo/db/persistent_task_store.h" #include "mongo/db/repl/optime.h" +#include "mongo/db/s/balancer_stats_registry.h" #include "mongo/db/s/collection_metadata.h" #include "mongo/db/s/migration_coordinator_document_gen.h" #include "mongo/db/s/migration_recipient_recovery_document_gen.h" @@ -149,6 +150,7 @@ void persistRangeDeletionTaskLocally(OperationContext* opCtx, */ void persistUpdatedNumOrphans(OperationContext* opCtx, const UUID& migrationId, + const UUID& collectionUuid, long long changeInOrphans); /** diff --git a/src/mongo/db/s/migration_util_test.cpp b/src/mongo/db/s/migration_util_test.cpp index 355043931e5..24fb4d3567e 100644 --- a/src/mongo/db/s/migration_util_test.cpp +++ b/src/mongo/db/s/migration_util_test.cpp @@ -341,17 +341,17 @@ TEST_F(MigrationUtilsTest, TestInvalidUUID) { TEST_F(MigrationUtilsTest, TestUpdateNumberOfOrphans) { auto opCtx = operationContext(); - const auto uuid = UUID::gen(); + const auto collectionUuid = UUID::gen(); PersistentTaskStore<RangeDeletionTask> store(NamespaceString::kRangeDeletionNamespace); - auto rangeDeletionDoc = createDeletionTask(opCtx, kTestNss, uuid, 0, 10); + auto rangeDeletionDoc = createDeletionTask(opCtx, kTestNss, collectionUuid, 0, 10); rangeDeletionDoc.setNumOrphanDocs(0); store.add(opCtx, rangeDeletionDoc); - migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), 5); + migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), collectionUuid, 5); rangeDeletionDoc.setNumOrphanDocs(5); ASSERT_EQ(store.count(opCtx, rangeDeletionDoc.toBSON().removeField("timestamp")), 1); - migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), -5); + migrationutil::persistUpdatedNumOrphans(opCtx, rangeDeletionDoc.getId(), collectionUuid, -5); rangeDeletionDoc.setNumOrphanDocs(0); ASSERT_EQ(store.count(opCtx, rangeDeletionDoc.toBSON().removeField("timestamp")), 1); } diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp index 5beae5cc6b6..99ef78ed220 100644 --- a/src/mongo/db/s/range_deletion_util.cpp +++ b/src/mongo/db/s/range_deletion_util.cpp @@ -325,7 +325,8 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx keyPattern, range, numDocsToRemovePerBatch)); - migrationutil::persistUpdatedNumOrphans(opCtx, migrationId, -numDeleted); + migrationutil::persistUpdatedNumOrphans( + opCtx, migrationId, collectionUuid, -numDeleted); if (MONGO_unlikely(hangAfterDoingDeletion.shouldFail())) { hangAfterDoingDeletion.pauseWhileSet(opCtx); diff --git a/src/mongo/db/s/shard_server_op_observer.cpp b/src/mongo/db/s/shard_server_op_observer.cpp index b4214be121c..228ef5ffbb9 100644 --- a/src/mongo/db/s/shard_server_op_observer.cpp +++ b/src/mongo/db/s/shard_server_op_observer.cpp @@ -36,6 +36,7 @@ #include "mongo/bson/util/bson_extract.h" #include "mongo/db/catalog_raii.h" #include "mongo/db/op_observer_impl.h" +#include "mongo/db/s/balancer_stats_registry.h" #include "mongo/db/s/chunk_split_state_driver.h" #include "mongo/db/s/chunk_splitter.h" #include "mongo/db/s/collection_critical_section_document_gen.h" @@ -57,6 +58,7 @@ #include "mongo/s/cannot_implicitly_create_collection_info.h" #include "mongo/s/catalog_cache_loader.h" #include "mongo/s/grid.h" +#include "mongo/s/sharding_feature_flags_gen.h" namespace mongo { namespace { @@ -268,6 +270,10 @@ void ShardServerOpObserver::onInserts(OperationContext* opCtx, opCtx->recoveryUnit()->registerChange( std::make_unique<SubmitRangeDeletionHandler>(opCtx, deletionTask)); } + + const auto numOrphanDocs = deletionTask.getNumOrphanDocs().value_or(0); + BalancerStatsRegistry::get(opCtx)->onRangeDeletionTaskInsertion( + deletionTask.getCollectionUuid(), numOrphanDocs); } if (nss == NamespaceString::kCollectionCriticalSectionsNamespace && @@ -446,7 +452,8 @@ void ShardServerOpObserver::aboutToDelete(OperationContext* opCtx, const UUID& uuid, BSONObj const& doc) { - if (nss == NamespaceString::kCollectionCriticalSectionsNamespace) { + if (nss == NamespaceString::kCollectionCriticalSectionsNamespace || + nss == NamespaceString::kRangeDeletionNamespace) { documentIdDecoration(opCtx) = doc; } else { // Extract the _id field from the document. If it does not have an _id, use the @@ -527,6 +534,33 @@ void ShardServerOpObserver::onDelete(OperationContext* opCtx, csr->exitCriticalSection(csrLock, reason); }); } + + if (nss == NamespaceString::kRangeDeletionNamespace) { + if (!feature_flags::gOrphanTracking.isEnabled(serverGlobalParams.featureCompatibility)) { + return; + } + + const auto& deletedDoc = documentId; + + const auto numOrphanDocs = [&] { + auto numOrphanDocsElem = update_oplog_entry::extractNewValueForField( + deletedDoc, RangeDeletionTask::kNumOrphanDocsFieldName); + return numOrphanDocsElem ? numOrphanDocsElem.exactNumberLong() : 0; + }(); + + auto collUuid = [&] { + BSONElement collUuidElem; + uassertStatusOK(bsonExtractField( + documentId, RangeDeletionTask::kCollectionUuidFieldName, &collUuidElem)); + return uassertStatusOK(UUID::parse(std::move(collUuidElem))); + }(); + + opCtx->recoveryUnit()->onCommit([opCtx = opCtx, + collUuid = std::move(collUuid), + numOrphanDocs](boost::optional<Timestamp>) { + BalancerStatsRegistry::get(opCtx)->onRangeDeletionTaskDeletion(collUuid, numOrphanDocs); + }); + } } void ShardServerOpObserver::onCreateCollection(OperationContext* opCtx, diff --git a/src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp b/src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp new file mode 100644 index 00000000000..743e895737f --- /dev/null +++ b/src/mongo/db/s/shardsvr_get_stats_for_balancing_command.cpp @@ -0,0 +1,145 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding + +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/commands.h" +#include "mongo/db/db_raii.h" +#include "mongo/db/s/balancer_stats_registry.h" +#include "mongo/db/s/get_stats_for_balancing_gen.h" +#include "mongo/db/s/sharding_state.h" +#include "mongo/logv2/log.h" +#include "mongo/s/grid.h" +#include "mongo/s/sharding_feature_flags_gen.h" + +namespace mongo { +namespace { + +class ShardsvrGetStatsForBalancingCmd final : public TypedCommand<ShardsvrGetStatsForBalancingCmd> { +public: + using Request = ShardsvrGetStatsForBalancing; + using Reply = ShardsvrGetStatsForBalancingReply; + + // Default scale factor for data size (MiB) + static constexpr int kDefaultScaleFactorMB{1024 * 1024}; + + bool skipApiVersionCheck() const override { + // Internal command (config -> shard). + return true; + } + + std::string help() const override { + return "Internal command invoked by the config server to retrieve statistics from shard " + "used for balancing"; + } + + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { + return AllowedOnSecondary::kNever; + } + + bool adminOnly() const override { + return true; + } + + class Invocation final : public InvocationBase { + public: + using InvocationBase::InvocationBase; + + Reply typedRun(OperationContext* opCtx) { + uassertStatusOK(ShardingState::get(opCtx)->canAcceptShardedCommands()); + opCtx->setAlwaysInterruptAtStepDownOrUp(); + + uassert(ErrorCodes::InvalidOptions, + "At least one collection must be specified", + request().getCollections().size()); + + const auto scaleFactor = request().getScaleFactor().get_value_or(kDefaultScaleFactorMB); + std::vector<CollStatsForBalancing> collStats; + collStats.reserve(request().getCollections().size()); + + for (const auto& ns : request().getCollections()) { + const auto collDataSizeScaled = + static_cast<long long>(_getCollDataSizeBytes(opCtx, ns) / scaleFactor); + collStats.emplace_back(ns, collDataSizeScaled); + } + return {std::move(collStats)}; + } + + private: + long long _getCollDataSizeBytes(OperationContext* opCtx, const NamespaceString& ns) const { + boost::optional<UUID> collUUID; + long long numRecords{0}; + long long dataSizeBytes{0}; + + if (AutoGetCollectionForReadCommandMaybeLockFree autoColl{opCtx, ns}) { + collUUID = autoColl->uuid(); + numRecords = autoColl->numRecords(opCtx); + dataSizeBytes = autoColl->dataSize(opCtx); + } + + // If the collection doesn't exists or is empty return 0 + if (dataSizeBytes == 0) { + return 0LL; + } + + const long long numOrphanDocs = [&] { + if (!feature_flags::gOrphanTracking.isEnabled( + serverGlobalParams.featureCompatibility)) { + return 0LL; + } + return BalancerStatsRegistry::get(opCtx)->getCollNumOrphanDocs(*collUUID); + }(); + + invariant(numRecords >= numOrphanDocs); + + const auto avgObjSizeBytes = static_cast<long long>(dataSizeBytes / numRecords); + return avgObjSizeBytes * (numRecords - numOrphanDocs); + } + + NamespaceString ns() const override { + return {request().getDbName(), ""}; + } + + bool supportsWriteConcern() const override { + return false; + } + + void doCheckAuthorization(OperationContext* opCtx) const override { + uassert(ErrorCodes::Unauthorized, + "Unauthorized", + AuthorizationSession::get(opCtx->getClient()) + ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::internal)); + } + }; +} _shardsvrGetStatsForBalancingCmd; + +} // namespace +} // namespace mongo |