diff options
Diffstat (limited to 'src/mongo/db/s/balancer_stats_registry.cpp')
-rw-r--r-- | src/mongo/db/s/balancer_stats_registry.cpp | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/src/mongo/db/s/balancer_stats_registry.cpp b/src/mongo/db/s/balancer_stats_registry.cpp new file mode 100644 index 00000000000..f96e5e3b3bb --- /dev/null +++ b/src/mongo/db/s/balancer_stats_registry.cpp @@ -0,0 +1,286 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding + +#include "mongo/db/s/balancer_stats_registry.h" + +#include "mongo/db/dbdirectclient.h" +#include "mongo/db/pipeline/aggregate_command_gen.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/s/range_deletion_task_gen.h" +#include "mongo/db/s/range_deletion_util.h" +#include "mongo/db/service_context.h" +#include "mongo/logv2/log.h" +#include "mongo/s/sharding_feature_flags_gen.h" + +namespace mongo { +namespace { + +const auto balancerStatsRegistryDecorator = + ServiceContext::declareDecoration<BalancerStatsRegistry>(); + +/** + * Constructs the default options for the private thread pool used for asyncronous initialization + */ +ThreadPool::Options makeDefaultThreadPoolOptions() { + ThreadPool::Options options; + options.poolName = "BalancerStatsRegistry"; + options.minThreads = 0; + options.maxThreads = 1; + + // Ensure all threads have a client + options.onCreateThread = [](const std::string& threadName) { + Client::initThread(threadName.c_str()); + stdx::lock_guard<Client> lk{cc()}; + cc().setSystemOperationKillableByStepdown(lk); + }; + + return options; +} +} // namespace + +const ReplicaSetAwareServiceRegistry::Registerer<BalancerStatsRegistry> + balancerStatsRegistryRegisterer("BalancerStatsRegistry"); + + +BalancerStatsRegistry* BalancerStatsRegistry::get(ServiceContext* serviceContext) { + return &balancerStatsRegistryDecorator(serviceContext); +} + +BalancerStatsRegistry* BalancerStatsRegistry::get(OperationContext* opCtx) { + return get(opCtx->getServiceContext()); +} + +void BalancerStatsRegistry::onStartup(OperationContext* opCtx) { + _threadPool = std::make_shared<ThreadPool>(makeDefaultThreadPoolOptions()); + _threadPool->startup(); +} + +void BalancerStatsRegistry::onStepUpComplete(OperationContext* opCtx, long long term) { + if (!feature_flags::gOrphanTracking.isEnabled(serverGlobalParams.featureCompatibility)) { + // If future flag is disabled do not initialize the registry + return; + } + initializeAsync(opCtx); +} + +void BalancerStatsRegistry::initializeAsync(OperationContext* opCtx) { + ExecutorFuture<void>(_threadPool) + .then([this] { + // All threads in this pool + auto opCtxHolder{cc().makeOperationContext()}; + opCtxHolder->setAlwaysInterruptAtStepDownOrUp(); + auto opCtx{opCtxHolder.get()}; + + if (!repl::ReplicationCoordinator::get(opCtx)->getMemberState().primary()) { + return; + } + + LOGV2_DEBUG(6419601, 2, "Initializing BalancerStatsRegistry"); + try { + // Lock the range deleter to prevent + // concurrent modifications of orphans count + ScopedRangeDeleterLock rangeDeleterLock(opCtx); + // Load current ophans count from disk + _loadOrphansCount(opCtx); + LOGV2_DEBUG(6419602, 2, "Completed BalancerStatsRegistry initialization"); + // Start accepting updates to the cached orphan count + _isInitialized.store(true); + // Unlock the range deleter (~ScopedRangeDeleterLock) + } catch (const DBException& ex) { + LOGV2_ERROR(6419600, + "Failed to initialize BalancerStatsRegistry after stepUp", + "error"_attr = redact(ex)); + } + }) + .getAsync([](auto) {}); +} + +void BalancerStatsRegistry::terminate() { + // Wait for the asyncronous initialization to complete + _threadPool->waitForIdle(); + // Prevent future usages until next stepup + _isInitialized.store(false); + // Clear the stats + stdx::lock_guard lk{_mutex}; + _collStatsMap.clear(); + LOGV2_DEBUG(6419603, 2, "BalancerStatsRegistry terminated"); +} + +void BalancerStatsRegistry::onStepDown() { + terminate(); +} + +long long BalancerStatsRegistry::getCollNumOrphanDocs(const UUID& collectionUUID) const { + if (!_isInitialized.load()) + uasserted(ErrorCodes::NotYetInitialized, "BalancerStatsRegistry is not initialized"); + + stdx::lock_guard lk{_mutex}; + auto collStats = _collStatsMap.find(collectionUUID); + if (collStats != _collStatsMap.end()) { + return collStats->second.numOrphanDocs; + } + return 0; +} + +void BalancerStatsRegistry::onRangeDeletionTaskInsertion(const UUID& collectionUUID, + long long numOrphanDocs) { + if (!_isInitialized.load()) + return; + + stdx::lock_guard lk{_mutex}; + auto& stats = _collStatsMap[collectionUUID]; + stats.numRangeDeletionTasks += 1; + stats.numOrphanDocs += numOrphanDocs; +} + +void BalancerStatsRegistry::onRangeDeletionTaskDeletion(const UUID& collectionUUID, + long long numOrphanDocs) { + if (!_isInitialized.load()) + return; + + stdx::lock_guard lk{_mutex}; + auto collStatsIt = _collStatsMap.find(collectionUUID); + if (collStatsIt == _collStatsMap.end()) { + LOGV2_ERROR(6419612, + "Couldn't find cached range deletion tasks count during decrese attempt", + "collectionUUID"_attr = collectionUUID, + "numOrphanDocs"_attr = numOrphanDocs); + return; + } + auto& stats = collStatsIt->second; + + stats.numRangeDeletionTasks -= 1; + stats.numOrphanDocs -= numOrphanDocs; + + if (stats.numRangeDeletionTasks <= 0) { + if (MONGO_unlikely(stats.numRangeDeletionTasks < 0)) { + LOGV2_ERROR(6419613, + "Cached count of range deletion tasks became negative. Resetting it to 0", + "collectionUUID"_attr = collectionUUID, + "numRangeDeletionTasks"_attr = stats.numRangeDeletionTasks, + "numOrphanDocs"_attr = stats.numRangeDeletionTasks); + } + _collStatsMap.erase(collStatsIt); + } +} + +void BalancerStatsRegistry::updateOrphansCount(const UUID& collectionUUID, long long delta) { + if (!_isInitialized.load() || delta == 0) + return; + + stdx::lock_guard lk{_mutex}; + if (delta > 0) { + // Increase or create the stats if missing + _collStatsMap[collectionUUID].numOrphanDocs += delta; + } else { + // Decrease orphan docs count + auto collStatsIt = _collStatsMap.find(collectionUUID); + if (collStatsIt == _collStatsMap.end()) { + // This should happen only in case of direct manipulation of range deletion tasks + // documents or direct writes into orphaned ranges + LOGV2_ERROR(6419610, + "Couldn't find cached orphan documents count during decrese attempt", + "collectionUUID"_attr = collectionUUID, + "delta"_attr = delta); + return; + } + auto& stats = collStatsIt->second; + + stats.numOrphanDocs += delta; + + if (stats.numOrphanDocs < 0) { + // This should happen only in case of direct manipulation of range deletion tasks + // documents or direct writes into orphaned ranges + LOGV2_ERROR(6419611, + "Cached orphan documents count became negative, resetting it to 0", + "collectionUUID"_attr = collectionUUID, + "numOrphanDocs"_attr = stats.numOrphanDocs, + "delta"_attr = delta, + "numRangeDeletionTasks"_attr = stats.numRangeDeletionTasks); + stats.numOrphanDocs = 0; + } + } +} + + +void BalancerStatsRegistry::_loadOrphansCount(OperationContext* opCtx) { + static constexpr auto kNumOrphanDocsLabel = "numOrphanDocs"_sd; + static constexpr auto kNumRangeDeletionTasksLabel = "numRangeDeletionTasks"_sd; + + /* + * { + * $group: { + * _id: $collectionUUID, + * numOrphanDocs: {$sum: $numOrphanDocs}, + * numRangeDeletionTasks: {$count: {}}, + * } + * } + */ + static const BSONObj groupStage{ + BSON("$group" << BSON("_id" + << "$" + RangeDeletionTask::kCollectionUuidFieldName + << kNumOrphanDocsLabel + << BSON("$sum" + << "$" + RangeDeletionTask::kNumOrphanDocsFieldName) + << kNumRangeDeletionTasksLabel << BSON("$count" << BSONObj())))}; + AggregateCommandRequest aggRequest{NamespaceString::kRangeDeletionNamespace, {groupStage}}; + + DBDirectClient client{opCtx}; + auto cursor = uassertStatusOK(DBClientCursor::fromAggregationRequest( + &client, std::move(aggRequest), false /* secondaryOk */, true /* useExhaust */)); + + { + stdx::lock_guard lk{_mutex}; + _collStatsMap.clear(); + while (cursor->more()) { + auto collObj = cursor->next(); + auto collUUID = uassertStatusOK(UUID::parse(collObj["_id"])); + auto orphanCount = collObj[kNumOrphanDocsLabel].exactNumberLong(); + auto numRangeDeletionTasks = collObj[kNumRangeDeletionTasksLabel].exactNumberLong(); + invariant(numRangeDeletionTasks > 0); + if (orphanCount < 0) { + LOGV2_ERROR(6419621, + "Found negative orphan count in range deletion task documents", + "collectionUUID"_attr = collUUID, + "numOrphanDocs"_attr = orphanCount, + "numRangeDeletionTasks"_attr = numRangeDeletionTasks); + orphanCount = 0; + } + _collStatsMap.emplace(collUUID, CollectionStats{orphanCount, numRangeDeletionTasks}); + } + LOGV2_DEBUG(6419604, + 2, + "Populated BalancerStatsRegistry cache", + "numCollections"_attr = _collStatsMap.size()); + } +} + +} // namespace mongo |