From 7ac8ffa47bfe83e12c361f083fdd96a721ffae60 Mon Sep 17 00:00:00 2001 From: Pierlauro Sciarelli Date: Wed, 21 Dec 2022 10:30:11 +0000 Subject: SERVER-71609 `splitOrMarkJumbo` must not over-split chunks --- src/mongo/db/s/auto_split_vector_command.cpp | 3 ++- .../config/sharding_catalog_manager_chunk_operations.cpp | 12 ++++++++++-- src/mongo/s/request_types/auto_split_vector.idl | 4 ++++ src/mongo/s/shard_util.cpp | 6 ++++-- src/mongo/s/shard_util.h | 16 ++++++++-------- 5 files changed, 28 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/mongo/db/s/auto_split_vector_command.cpp b/src/mongo/db/s/auto_split_vector_command.cpp index ba48fdffed8..2fa8ad9c5c3 100644 --- a/src/mongo/db/s/auto_split_vector_command.cpp +++ b/src/mongo/db/s/auto_split_vector_command.cpp @@ -89,7 +89,8 @@ public: req.getKeyPattern(), req.getMin(), req.getMax(), - req.getMaxChunkSizeBytes()); + req.getMaxChunkSizeBytes(), + req.getLimit()); return Response(std::move(splitPoints), continuation); } diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp index d622162ef09..a9e62c5a472 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp @@ -1713,13 +1713,18 @@ void ShardingCatalogManager::splitOrMarkJumbo(OperationContext* opCtx, Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes()); }(); - const auto splitPoints = uassertStatusOK( + // Limit the search to one split point: this code path is reached when a migration fails due + // to ErrorCodes::ChunkTooBig. In case there is a too frequent shard key, only select the + // next key in order to split the range in jumbo chunk + remaining range. + const int limit = 1; + auto splitPoints = uassertStatusOK( shardutil::selectChunkSplitPoints(opCtx, chunk.getShardId(), nss, cm.getShardKeyPattern(), ChunkRange(chunk.getMin(), chunk.getMax()), - maxChunkSizeBytes)); + maxChunkSizeBytes, + limit)); if (splitPoints.empty()) { LOGV2(21873, @@ -1771,6 +1776,9 @@ void ShardingCatalogManager::splitOrMarkJumbo(OperationContext* opCtx, return; } + // Resize the vector because in multiversion scenarios the `autoSplitVector` command may end + // up ignoring the `limit` parameter and returning the whole list of split points. + splitPoints.resize(limit); uassertStatusOK( shardutil::splitChunkAtMultiplePoints(opCtx, chunk.getShardId(), diff --git a/src/mongo/s/request_types/auto_split_vector.idl b/src/mongo/s/request_types/auto_split_vector.idl index be25b1c00ce..50e3c55ad92 100644 --- a/src/mongo/s/request_types/auto_split_vector.idl +++ b/src/mongo/s/request_types/auto_split_vector.idl @@ -75,3 +75,7 @@ commands: maxChunkSizeBytes: type: safeInt64 description: "Max chunk size of the collection expressed in bytes" + limit: + type: int + description: "Max number of split points to look for" + optional: true diff --git a/src/mongo/s/shard_util.cpp b/src/mongo/s/shard_util.cpp index d0f64821611..09f8408de42 100644 --- a/src/mongo/s/shard_util.cpp +++ b/src/mongo/s/shard_util.cpp @@ -134,14 +134,16 @@ StatusWith> selectChunkSplitPoints(OperationContext* opCtx, const NamespaceString& nss, const ShardKeyPattern& shardKeyPattern, const ChunkRange& chunkRange, - long long chunkSizeBytes) { + long long chunkSizeBytes, + boost::optional limit) { auto shardStatus = Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } - const AutoSplitVectorRequest req( + AutoSplitVectorRequest req( nss, shardKeyPattern.toBSON(), chunkRange.getMin(), chunkRange.getMax(), chunkSizeBytes); + req.setLimit(limit); auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts( opCtx, diff --git a/src/mongo/s/shard_util.h b/src/mongo/s/shard_util.h index 7558906ab27..0ab14da2811 100644 --- a/src/mongo/s/shard_util.h +++ b/src/mongo/s/shard_util.h @@ -78,20 +78,20 @@ StatusWith retrieveCollectionShardSize(OperationContext* opCtx, /** * Ask the specified shard to figure out the split points for a given chunk. * - * shardId The shard id to query. - * nss Namespace, which owns the chunk. - * shardKeyPattern The shard key which corresponds to this sharded namespace. - * chunkRange Bounds of the chunk to be split. - * chunkSize Chunk size to target in bytes. - * maxObjs Limits the number of objects in each chunk. Zero means max, unspecified means use the - * server default. + * - shardId: the shard id to query. + * - nss: namespace, which owns the chunk. + * - shardKeyPattern: the shard key which corresponds to this sharded namespace. + * - chunkRange: bounds of the chunk to search for split points on. + * - chunkSizeBytes: chunk size to target in bytes. + * - limit: limits the number of split points to search. Unspecified means no limit */ StatusWith> selectChunkSplitPoints(OperationContext* opCtx, const ShardId& shardId, const NamespaceString& nss, const ShardKeyPattern& shardKeyPattern, const ChunkRange& chunkRange, - long long chunkSizeBytes); + long long chunkSizeBytes, + boost::optional limit = boost::none); /** * Asks the specified shard to split the chunk described by min/maxKey into the respective split -- cgit v1.2.1