From db0c685310ef4d928bbfe513b669d8fab8b7bfac Mon Sep 17 00:00:00 2001 From: Paolo Polato Date: Mon, 21 Feb 2022 20:15:28 +0000 Subject: SERVER-63203 Do not fail chunk split if more than 8192 points are requested --- src/mongo/db/s/balancer/balancer.cpp | 8 +++--- src/mongo/db/s/chunk_splitter.cpp | 24 ++++++++++-------- src/mongo/db/s/split_chunk.cpp | 40 ++++++++++++++++-------------- src/mongo/db/s/split_chunk.h | 2 +- src/mongo/db/s/split_chunk_command.cpp | 9 +++++-- src/mongo/s/commands/cluster_split_cmd.cpp | 5 +++- src/mongo/s/shard_util.cpp | 32 +++++++++++++----------- src/mongo/s/shard_util.h | 4 ++- 8 files changed, 72 insertions(+), 52 deletions(-) diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp index d8679530e3d..dc28d73472a 100644 --- a/src/mongo/db/s/balancer/balancer.cpp +++ b/src/mongo/db/s/balancer/balancer.cpp @@ -588,7 +588,7 @@ Status Balancer::_splitChunksIfNeeded(OperationContext* opCtx) { return chunksToSplitStatus.getStatus(); } - for (const auto& splitInfo : chunksToSplitStatus.getValue()) { + for (auto& splitInfo : chunksToSplitStatus.getValue()) { auto routingInfoStatus = Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh( opCtx, splitInfo.nss); @@ -605,7 +605,7 @@ Status Balancer::_splitChunksIfNeeded(OperationContext* opCtx) { cm->getShardKeyPattern(), splitInfo.collectionVersion, ChunkRange(splitInfo.minKey, splitInfo.maxKey), - splitInfo.splitKeys); + &splitInfo.splitKeys); if (!splitStatus.isOK()) { LOGV2_WARNING(21879, "Failed to split chunk {splitInfo} {error}", @@ -692,7 +692,7 @@ void Balancer::_splitOrMarkJumbo(OperationContext* opCtx, auto chunk = cm->findIntersectingChunkWithSimpleCollation(minKey); try { - const auto splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints( + auto splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints( opCtx, chunk.getShardId(), nss, @@ -735,7 +735,7 @@ void Balancer::_splitOrMarkJumbo(OperationContext* opCtx, cm->getShardKeyPattern(), cm->getVersion(), ChunkRange(chunk.getMin(), chunk.getMax()), - splitPoints)); + &splitPoints)); } catch (const DBException&) { } } diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp index 8bab73b7047..e962cf06112 100644 --- a/src/mongo/db/s/chunk_splitter.cpp +++ b/src/mongo/db/s/chunk_splitter.cpp @@ -52,6 +52,7 @@ #include "mongo/s/config_server_client.h" #include "mongo/s/grid.h" #include "mongo/s/shard_key_pattern.h" +#include "mongo/s/shard_util.h" #include "mongo/util/assert_util.h" namespace mongo { @@ -84,22 +85,23 @@ Status splitChunkAtMultiplePoints(OperationContext* opCtx, const ShardKeyPattern& shardKeyPattern, const ChunkVersion& collectionVersion, const ChunkRange& chunkRange, - const std::vector& splitPoints) { + std::vector&& splitPoints) { invariant(!splitPoints.empty()); - const size_t kMaxSplitPoints = 8192; - - if (splitPoints.size() > kMaxSplitPoints) { - return {ErrorCodes::BadValue, - str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints - << " parts at a time."}; + if (splitPoints.size() > shardutil::kMaxSplitPoints) { + LOGV2_WARNING(6320301, + "Unable to apply all the split points received. Only the first " + "kMaxSplitPoints will be processed", + "numSplitPointsReceived"_attr = splitPoints.size(), + "kMaxSplitPoints"_attr = shardutil::kMaxSplitPoints); + splitPoints.resize(shardutil::kMaxSplitPoints); } return splitChunk(opCtx, nss, shardKeyPattern.toBSON(), chunkRange, - splitPoints, + std::move(splitPoints), shardId.toString(), collectionVersion.epoch()) .getStatus() @@ -389,13 +391,15 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr chunkSp } } + auto numSplitPoints = splitPoints.size(); + uassertStatusOK(splitChunkAtMultiplePoints(opCtx.get(), chunk.getShardId(), nss, shardKeyPattern, cm->getVersion(), chunk.getRange(), - splitPoints)); + std::move(splitPoints))); chunkSplitStateDriver->commitSplit(); const bool shouldBalance = isAutoBalanceEnabled(opCtx.get(), nss, balancerConfig); @@ -408,7 +412,7 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr chunkSp "minKey"_attr = redact(chunk.getMin()), "maxKey"_attr = redact(chunk.getMax()), "lastmod"_attr = redact(chunk.getLastmod().toBSON()), - "splitPoints"_attr = splitPoints.size(), + "splitPoints"_attr = numSplitPoints, "maxChunkSizeBytes"_attr = maxChunkSizeBytes, "extraInfo"_attr = (topChunkMinKey.isEmpty() ? "" diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp index 968a4834419..d4e5b1ef083 100644 --- a/src/mongo/db/s/split_chunk.cpp +++ b/src/mongo/db/s/split_chunk.cpp @@ -90,14 +90,14 @@ bool checkIfSingleDoc(OperationContext* opCtx, /** * Checks the collection's metadata for a successful split on the specified chunkRange using the - * specified splitKeys. Returns false if the metadata's chunks don't match the new chunk + * specified split points. Returns false if the metadata's chunks don't match the new chunk * boundaries exactly. */ bool checkMetadataForSuccessfulSplitChunk(OperationContext* opCtx, const NamespaceString& nss, const OID& epoch, const ChunkRange& chunkRange, - const std::vector& splitKeys) { + const std::vector& splitPoints) { const auto metadataAfterSplit = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); return CollectionShardingState::get(opCtx, nss)->getCollectionDescription(); @@ -107,19 +107,21 @@ bool checkMetadataForSuccessfulSplitChunk(OperationContext* opCtx, str::stream() << "Collection " << nss.ns() << " changed since split start", metadataAfterSplit.getCollVersion().epoch() == epoch); - auto newChunkBounds(splitKeys); - auto startKey = chunkRange.getMin(); - newChunkBounds.push_back(chunkRange.getMax()); - ChunkType nextChunk; - for (const auto& endKey : newChunkBounds) { + for (auto it = splitPoints.begin(); it != splitPoints.end(); ++it) { // Check that all new chunks fit the new chunk boundaries - if (!metadataAfterSplit->getNextChunk(startKey, &nextChunk) || - nextChunk.getMax().woCompare(endKey)) { + const auto& currentChunkMinKey = + it == splitPoints.begin() ? chunkRange.getMin() : *std::prev(it); + const auto& currentChunkMaxKey = *it; + if (!metadataAfterSplit->getNextChunk(currentChunkMinKey, &nextChunk) || + nextChunk.getMax().woCompare(currentChunkMaxKey)) { return false; } - - startKey = endKey; + } + // Special check for the last chunk produced. + if (!metadataAfterSplit->getNextChunk(splitPoints.back(), &nextChunk) || + nextChunk.getMax().woCompare(chunkRange.getMax())) { + return false; } return true; @@ -131,7 +133,7 @@ StatusWith> splitChunk(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& keyPatternObj, const ChunkRange& chunkRange, - const std::vector& splitKeys, + std::vector&& splitPoints, const std::string& shardName, const OID& expectedCollectionEpoch) { auto scopedSplitOrMergeChunk(uassertStatusOK( @@ -141,8 +143,8 @@ StatusWith> splitChunk(OperationContext* opCtx, // data types. const auto hashedField = ShardKeyPattern::extractHashedField(keyPatternObj); if (hashedField) { - for (BSONObj splitKey : splitKeys) { - auto hashedSplitElement = splitKey[hashedField.fieldName()]; + for (const auto& splitPoint : splitPoints) { + auto hashedSplitElement = splitPoint[hashedField.fieldName()]; if (!ShardKeyPattern::isValidHashedValue(hashedSplitElement)) { return {ErrorCodes::CannotSplit, str::stream() << "splitChunk cannot split chunk " << chunkRange.toString() @@ -154,8 +156,8 @@ StatusWith> splitChunk(OperationContext* opCtx, } // Commit the split to the config server. - auto request = - SplitChunkRequest(nss, shardName, expectedCollectionEpoch, chunkRange, splitKeys); + auto request = SplitChunkRequest( + nss, shardName, expectedCollectionEpoch, chunkRange, std::move(splitPoints)); auto configCmdObj = request.toConfigCommandBSON(ShardingCatalogClient::kMajorityWriteConcern.toBSON()); @@ -204,7 +206,7 @@ StatusWith> splitChunk(OperationContext* opCtx, // if (!commandStatus.isOK() || !writeConcernStatus.isOK()) { if (checkMetadataForSuccessfulSplitChunk( - opCtx, nss, expectedCollectionEpoch, chunkRange, splitKeys)) { + opCtx, nss, expectedCollectionEpoch, chunkRange, request.getSplitPoints())) { // Split was committed. } else if (!commandStatus.isOK()) { return commandStatus; @@ -233,12 +235,12 @@ StatusWith> splitChunk(OperationContext* opCtx, } auto backChunk = ChunkType(); - backChunk.setMin(splitKeys.back()); + backChunk.setMin(request.getSplitPoints().back()); backChunk.setMax(chunkRange.getMax()); auto frontChunk = ChunkType(); frontChunk.setMin(chunkRange.getMin()); - frontChunk.setMax(splitKeys.front()); + frontChunk.setMax(request.getSplitPoints().front()); KeyPattern shardKeyPattern(keyPatternObj); if (shardKeyPattern.globalMax().woCompare(backChunk.getMax()) == 0 && diff --git a/src/mongo/db/s/split_chunk.h b/src/mongo/db/s/split_chunk.h index 6f394d397e5..16a75f0b94f 100644 --- a/src/mongo/db/s/split_chunk.h +++ b/src/mongo/db/s/split_chunk.h @@ -59,7 +59,7 @@ StatusWith> splitChunk(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& keyPatternObj, const ChunkRange& chunkRange, - const std::vector& splitKeys, + std::vector&& splitPoints, const std::string& shardName, const OID& expectedCollectionEpoch); diff --git a/src/mongo/db/s/split_chunk_command.cpp b/src/mongo/db/s/split_chunk_command.cpp index 9a17b009b2d..483a651a4f1 100644 --- a/src/mongo/db/s/split_chunk_command.cpp +++ b/src/mongo/db/s/split_chunk_command.cpp @@ -139,8 +139,13 @@ public: OID expectedCollectionEpoch; uassertStatusOK(bsonExtractOIDField(cmdObj, "epoch", &expectedCollectionEpoch)); - auto topChunk = uassertStatusOK(splitChunk( - opCtx, nss, keyPatternObj, chunkRange, splitKeys, shardName, expectedCollectionEpoch)); + auto topChunk = uassertStatusOK(splitChunk(opCtx, + nss, + keyPatternObj, + chunkRange, + std::move(splitKeys), + shardName, + expectedCollectionEpoch)); // Otherwise, we want to check whether or not top-chunk optimization should be performed. If // yes, then we should have a ChunkRange that was returned. Regardless of whether it should diff --git a/src/mongo/s/commands/cluster_split_cmd.cpp b/src/mongo/s/commands/cluster_split_cmd.cpp index f5ea1d18fee..cab898fff2a 100644 --- a/src/mongo/s/commands/cluster_split_cmd.cpp +++ b/src/mongo/s/commands/cluster_split_cmd.cpp @@ -262,6 +262,9 @@ public: "namespace"_attr = nss.ns(), "shardId"_attr = chunk->getShardId()); + std::vector splitPoints; + splitPoints.push_back(std::move(splitPoint)); + uassertStatusOK( shardutil::splitChunkAtMultiplePoints(opCtx, chunk->getShardId(), @@ -269,7 +272,7 @@ public: cm->getShardKeyPattern(), cm->getVersion(), ChunkRange(chunk->getMin(), chunk->getMax()), - {splitPoint})); + &splitPoints)); // This invalidation is only necessary so that auto-split can begin to track statistics for // the chunks produced after the split instead of the single original chunk. diff --git a/src/mongo/s/shard_util.cpp b/src/mongo/s/shard_util.cpp index 869750458f1..ec2f65858a0 100644 --- a/src/mongo/s/shard_util.cpp +++ b/src/mongo/s/shard_util.cpp @@ -159,31 +159,35 @@ StatusWith> splitChunkAtMultiplePoints( const ShardKeyPattern& shardKeyPattern, ChunkVersion collectionVersion, const ChunkRange& chunkRange, - const std::vector& splitPoints) { - invariant(!splitPoints.empty()); - - const size_t kMaxSplitPoints = 8192; - - if (splitPoints.size() > kMaxSplitPoints) { - return {ErrorCodes::BadValue, - str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints - << " parts at a time."}; + std::vector* splitPoints) { + invariant(!splitPoints->empty()); + + if (splitPoints->size() > kMaxSplitPoints) { + LOGV2_WARNING(6320300, + "Unable to apply all the split points received. Only the first " + "kMaxSplitPoints will be processed", + "numSplitPointsReceived"_attr = splitPoints->size(), + "kMaxSplitPoints"_attr = kMaxSplitPoints); + splitPoints->resize(kMaxSplitPoints); } // Sanity check that we are not attempting to split at the boundaries of the chunk. This check // is already performed at chunk split commit time, but we are performing it here for parity // with old auto-split code, which might rely on it. - if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == splitPoints.front())) { + const auto& firstSplitPoint = splitPoints->front(); + + if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == firstSplitPoint)) { const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString() << ", split point " - << splitPoints.front() << " is exactly on chunk bounds"); + << firstSplitPoint << " is exactly on chunk bounds"); return {ErrorCodes::CannotSplit, msg}; } - if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == splitPoints.back())) { + const auto& lastSplitPoint = splitPoints->back(); + if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == lastSplitPoint)) { const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString() << ", split point " - << splitPoints.back() << " is exactly on chunk bounds"); + << lastSplitPoint << " is exactly on chunk bounds"); return {ErrorCodes::CannotSplit, msg}; } @@ -195,7 +199,7 @@ StatusWith> splitChunkAtMultiplePoints( collectionVersion.appendWithField( &cmd, ChunkVersion::kShardVersionField); // backwards compatibility with v3.4 chunkRange.append(&cmd); - cmd.append("splitKeys", splitPoints); + cmd.append("splitKeys", *splitPoints); BSONObj cmdObj = cmd.obj(); diff --git a/src/mongo/s/shard_util.h b/src/mongo/s/shard_util.h index 36e023b134d..4e6beb33a9d 100644 --- a/src/mongo/s/shard_util.h +++ b/src/mongo/s/shard_util.h @@ -51,6 +51,8 @@ class StatusWith; */ namespace shardutil { +static constexpr size_t kMaxSplitPoints = 8192; + /** * Executes the listDatabases command against the specified shard and obtains the total data * size across all databases in bytes (essentially, the totalSize field). @@ -99,7 +101,7 @@ StatusWith> splitChunkAtMultiplePoints( const ShardKeyPattern& shardKeyPattern, ChunkVersion collectionVersion, const ChunkRange& chunkRange, - const std::vector& splitPoints); + std::vector* splitPoints); } // namespace shardutil } // namespace mongo -- cgit v1.2.1