From c58adbce4f628f0da863bd779ada0512e5b242ec Mon Sep 17 00:00:00 2001 From: Simon Gratzer Date: Tue, 11 May 2021 16:51:47 +0200 Subject: SERVER-48653 Return updated ShardVersion in _configsvrCommitChunkSplit to avoid blind metadata refresh (BACKPORT-9049) --- jstests/sharding/major_version_check.js | 12 -- jstests/sharding/merge_split_chunks_test.js | 157 +++++++++++++++++++++ src/mongo/db/s/chunk_splitter.cpp | 25 ++-- .../db/s/config/configsvr_split_chunk_command.cpp | 6 +- src/mongo/db/s/config/sharding_catalog_manager.h | 17 ++- .../sharding_catalog_manager_chunk_operations.cpp | 18 ++- .../db/s/shard_filtering_metadata_refresh.cpp | 3 +- src/mongo/db/s/split_chunk.cpp | 22 ++- src/mongo/db/s/split_chunk.h | 1 + 9 files changed, 208 insertions(+), 53 deletions(-) create mode 100644 jstests/sharding/merge_split_chunks_test.js diff --git a/jstests/sharding/major_version_check.js b/jstests/sharding/major_version_check.js index 549b92ea47f..bc930a8f117 100644 --- a/jstests/sharding/major_version_check.js +++ b/jstests/sharding/major_version_check.js @@ -100,18 +100,6 @@ printjson(staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""})); - assert.eq(Timestamp(1, 0), - staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""}).version); - - // See if our stale mongos is required to catch up to run a findOne on a new connection - staleMongos = new Mongo(staleMongos.host); - staleMongos.getCollection(coll + "").findOne(); - - printjson(staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""})); - - assert.eq(Timestamp(1, 0), - staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""}).version); - // Run another split on the original chunk, which does not exist anymore (but the stale mongos // thinks it exists). This should fail and cause a refresh on the shard, updating its shard // version. diff --git a/jstests/sharding/merge_split_chunks_test.js b/jstests/sharding/merge_split_chunks_test.js new file mode 100644 index 00000000000..2cc5a69a112 --- /dev/null +++ b/jstests/sharding/merge_split_chunks_test.js @@ -0,0 +1,157 @@ +// +// Tests that merge, split and move chunks via mongos works/doesn't work with different chunk +// configurations +// +(function() { +'use strict'; + +/** + * Performs a find() on config.chunks on 'configDB', targeting chunks for the collection 'ns', + * and the optional 'extraQuery' and 'projection'. + * Chooses to query chunks by their 'ns' or uuid' fields according to it's config.collection + * entry having 'timestamp' or not. + */ +let findChunksByNs = function(configDB, ns, extraQuery = null, projection = null) { + const chunksQuery = Object.assign({ns: ns}, extraQuery); + return configDB.chunks.find(chunksQuery, projection); +}; + +var st = new ShardingTest({shards: 2, mongos: 2}); + +var mongos = st.s0; +var staleMongos = st.s1; +var admin = mongos.getDB("admin"); +var coll = mongos.getCollection("foo.bar"); + +assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""})); +st.ensurePrimaryShard('foo', st.shard0.shardName); +assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}})); + +// Create ranges MIN->0,0->10,(hole),20->40,40->50,50->90,(hole),100->110,110->MAX on first +// shard +jsTest.log("Creating ranges..."); + +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 10}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 20}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 40}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 50}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 90}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 100}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 110}})); + +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 10}, to: st.shard1.shardName})); +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 90}, to: st.shard1.shardName})); + +st.printShardingStatus(); + +// Insert some data into each of the consolidated ranges +let numDocs = 0; +for (let i = 120; i <= 240; i++) { + assert.commandWorked(coll.insert({_id: i})); + numDocs++; +} + +var staleCollection = staleMongos.getCollection(coll + ""); + +// S0: min->0, 0->10, 20->40, 40->50, 50->90, 100->110, 110->max +// S1: 10->20, 90->100 +assert.eq(9, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); + +jsTest.log("Trying merges that should succeed..."); + +// Make sure merge including the MinKey works +assert.commandWorked( + admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 10}]})); +assert.eq(8, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +assert.eq(numDocs, staleCollection.find().itcount()); +// S0: min->10, 20->40, 40->50, 50->90, 100->110, 110->max +// S1: 10->20, 90->100 + +// Make sure merging three chunks in the middle works +assert.commandWorked(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 90}]})); +assert.eq(6, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +assert.eq(numDocs, staleCollection.find().itcount()); +// S0: min->10, 20->90, 100->110, 110->max +// S1: 10->20, 90->100 + +// Make sure splitting chunks after merging works +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 55}})); +assert.eq(7, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +assert.eq(numDocs, staleCollection.find().itcount()); +// S0: min->10, 20->55, 55->90, 100->110, 110->max +// S1: 10->20, 90->100 + +// make sure moving the new chunk works +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 20}, to: st.shard1.shardName})); +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 55}, to: st.shard1.shardName})); +assert.eq(7, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +assert.eq(numDocs, staleCollection.find().itcount()); +// S0: min->10, 100->110, 110->max +// S1: 10->20, 20->55, 55->90, 90->100 + +// Make sure merge including the MaxKey works +assert.commandWorked( + admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 100}, {_id: MaxKey}]})); +assert.eq(numDocs, staleCollection.find().itcount()); +assert.eq(6, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +// S0: min->10, 100->max +// S1: 10->20, 20->55, 55->90, 90->100 + +// Make sure merging chunks after a chunk has been moved out of a shard succeeds +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 110}, to: st.shard1.shardName})); +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 10}, to: st.shard0.shardName})); +assert.eq(numDocs, staleCollection.find().itcount()); +assert.eq(6, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +// S0: min->10, 10->20 +// S1: 20->55, 55->90, 90->100, 100->max + +assert.commandWorked( + admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 90}, {_id: MaxKey}]})); +assert.commandWorked(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 90}]})); +assert.eq(numDocs, staleCollection.find().itcount()); +// S0: min->10, 10->20 +// S1: 20->90, 90->max + +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 15}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 30}})); +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 30}, to: st.shard0.shardName})); +assert.eq(numDocs, staleCollection.find().itcount()); +// S0: min->10, 10->15, 15->20, 30->90 +// S1: 20->30, 90->max + +// range has ha hole on shard 0 +assert.commandFailed( + admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 90}]})); + +// Make sure merge on the other shard after a chunk has been merged succeeds +assert.commandWorked( + admin.runCommand({moveChunk: coll + "", find: {_id: 20}, to: st.shard0.shardName})); +assert.commandWorked( + admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 90}]})); +// S0: min->90 +// S1: 90->max + +st.printShardingStatus(true); + +assert.eq(2, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount()); +assert.eq(1, findChunksByNs(st.s0.getDB('config'), 'foo.bar', { + 'min._id': MinKey, + 'max._id': 90, + shard: st.shard0.shardName + }).itcount()); +assert.eq(1, findChunksByNs(st.s0.getDB('config'), 'foo.bar', { + 'min._id': 90, + 'max._id': MaxKey, + shard: st.shard1.shardName + }).itcount()); + +st.stop(); +})(); diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp index 58a542c0a07..d09c74eb683 100644 --- a/src/mongo/db/s/chunk_splitter.cpp +++ b/src/mongo/db/s/chunk_splitter.cpp @@ -93,15 +93,15 @@ Status splitChunkAtMultiplePoints(OperationContext* opCtx, << " parts at a time."}; } - const auto status = splitChunk(opCtx, - nss, - shardKeyPattern.toBSON(), - chunkRange, - splitPoints, - shardId.toString(), - collectionVersion.epoch()); - - return status.getStatus().withContext("split failed"); + return splitChunk(opCtx, + nss, + shardKeyPattern.toBSON(), + chunkRange, + splitPoints, + shardId.toString(), + collectionVersion.epoch()) + .getStatus() + .withContext("split failed"); } /** @@ -397,13 +397,6 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr chunkSp : " (top chunk migration suggested" + (std::string)(shouldBalance ? ")" : ", but no migrations allowed)")); - // Because the ShardServerOpObserver uses the metadata from the CSS for tracking incoming - // writes, if we split a chunk but do not force a CSS refresh, subsequent inserts will see - // stale metadata and so will not trigger a chunk split. If we force metadata refresh here, - // we can limit the amount of time that the op observer is tracking writes on the parent - // chunk rather than on its child chunks. - forceShardFilteringMetadataRefresh(opCtx.get(), nss, false); - // Balance the resulting chunks if the autobalance option is enabled and if we split at the // first or last chunk on the collection as part of top chunk optimization. if (!shouldBalance || topChunkMinKey.isEmpty()) { diff --git a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp index 5451137ad59..c62bca910e9 100644 --- a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp +++ b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp @@ -112,14 +112,14 @@ public: auto parsedRequest = uassertStatusOK(SplitChunkRequest::parseFromConfigCommand(cmdObj)); - Status splitChunkResult = + auto shardVers = uassertStatusOK( ShardingCatalogManager::get(opCtx)->commitChunkSplit(opCtx, parsedRequest.getNamespace(), parsedRequest.getEpoch(), parsedRequest.getChunkRange(), parsedRequest.getSplitPoints(), - parsedRequest.getShardName()); - uassertStatusOK(splitChunkResult); + parsedRequest.getShardName())); + result.appendElements(shardVers); return true; } diff --git a/src/mongo/db/s/config/sharding_catalog_manager.h b/src/mongo/db/s/config/sharding_catalog_manager.h index 4e5f0d61241..21e6121cb5d 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager.h +++ b/src/mongo/db/s/config/sharding_catalog_manager.h @@ -179,13 +179,16 @@ public: /** * Updates metadata in the config.chunks collection to show the given chunk as split into * smaller chunks at the specified split points. - */ - Status commitChunkSplit(OperationContext* opCtx, - const NamespaceString& nss, - const OID& requestEpoch, - const ChunkRange& range, - const std::vector& splitPoints, - const std::string& shardName); + * + * Returns a BSON object with the newly produced chunk version after the migration: + * - shardVersion - The new shard version of the source shard + */ + StatusWith commitChunkSplit(OperationContext* opCtx, + const NamespaceString& nss, + const OID& requestEpoch, + const ChunkRange& range, + const std::vector& splitPoints, + const std::string& shardName); /** * Updates metadata in the config.chunks collection so the chunks with given boundaries are seen diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp index f07c54d6341..68d291d91dc 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp @@ -287,12 +287,13 @@ StatusWith getMaxChunkVersionFromQueryResponse( } // namespace -Status ShardingCatalogManager::commitChunkSplit(OperationContext* opCtx, - const NamespaceString& nss, - const OID& requestEpoch, - const ChunkRange& range, - const std::vector& splitPoints, - const std::string& shardName) { +StatusWith ShardingCatalogManager::commitChunkSplit( + OperationContext* opCtx, + const NamespaceString& nss, + const OID& requestEpoch, + const ChunkRange& range, + const std::vector& splitPoints, + const std::string& shardName) { // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and // migrations // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/ @@ -523,7 +524,10 @@ Status ShardingCatalogManager::commitChunkSplit(OperationContext* opCtx, } } - return Status::OK(); + // currentMaxVersion contains shard version with incremented minor version + BSONObjBuilder result; + currentMaxVersion.appendToCommand(&result); + return result.obj(); } Status ShardingCatalogManager::commitChunkMerge(OperationContext* opCtx, diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 2d8b2359378..49f5b486afa 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -82,8 +82,7 @@ void onShardVersionMismatch(OperationContext* opCtx, }(); if (currentShardVersion) { - if (currentShardVersion->epoch() == shardVersionReceived.epoch() && - currentShardVersion->majorVersion() >= shardVersionReceived.majorVersion()) { + if (shardVersionReceived.isOlderThan(*currentShardVersion)) { // Don't need to remotely reload if we're in the same epoch and the requested version is // smaller than the one we know about. This means that the remote side is behind. return; diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp index 9e98c376ede..71bd707ae03 100644 --- a/src/mongo/db/s/split_chunk.cpp +++ b/src/mongo/db/s/split_chunk.cpp @@ -188,9 +188,22 @@ StatusWith> splitChunk(OperationContext* opCtx, return cmdResponseStatus.getStatus(); } + // old versions might not have the shardVersion field + const Shard::CommandResponse& cmdResponse = cmdResponseStatus.getValue(); + if (cmdResponse.response[ChunkVersion::kShardVersionField]) { + const auto cv = uassertStatusOK( + ChunkVersion::parseWithField(cmdResponse.response, ChunkVersion::kShardVersionField)); + uassertStatusOK(onShardVersionMismatchNoExcept( + opCtx, nss, std::move(cv), true /* forceRefreshFromThisThread */)); + } else { + // Refresh metadata to pick up new chunk definitions (regardless of the results returned + // from running _configsvrCommitChunkMerge). + forceShardFilteringMetadataRefresh(opCtx, nss, true /* forceRefreshFromThisThread */); + } + // Check commandStatus and writeConcernStatus - auto commandStatus = cmdResponseStatus.getValue().commandStatus; - auto writeConcernStatus = cmdResponseStatus.getValue().writeConcernStatus; + auto commandStatus = cmdResponse.commandStatus; + auto writeConcernStatus = cmdResponse.writeConcernStatus; // Send stale epoch if epoch of request did not match epoch of collection if (commandStatus == ErrorCodes::StaleEpoch) { @@ -198,14 +211,12 @@ StatusWith> splitChunk(OperationContext* opCtx, } // - // If _configsvrCommitChunkSplit returned an error, refresh and look at the metadata to + // If _configsvrCommitChunkSplit returned an error, look at the metadata to // determine if the split actually did happen. This can happen if there's a network error // getting the response from the first call to _configsvrCommitChunkSplit, but it actually // succeeds, thus the automatic retry fails with a precondition violation, for example. // if (!commandStatus.isOK() || !writeConcernStatus.isOK()) { - forceShardFilteringMetadataRefresh(opCtx, nss); - if (checkMetadataForSuccessfulSplitChunk( opCtx, nss, expectedCollectionEpoch, chunkRange, splitKeys)) { // Split was committed. @@ -249,7 +260,6 @@ StatusWith> splitChunk(OperationContext* opCtx, checkIfSingleDoc(opCtx, collection, idx, &frontChunk)) { return boost::optional(ChunkRange(frontChunk.getMin(), frontChunk.getMax())); } - return boost::optional(boost::none); } diff --git a/src/mongo/db/s/split_chunk.h b/src/mongo/db/s/split_chunk.h index aaa074f2f55..6f394d397e5 100644 --- a/src/mongo/db/s/split_chunk.h +++ b/src/mongo/db/s/split_chunk.h @@ -48,6 +48,7 @@ class StatusWith; * Attempts to split a chunk with the specified parameters. If the split fails, then the StatusWith * object returned will contain a Status with an ErrorCode regarding the cause of failure. If the * split succeeds, then the StatusWith object returned will contain Status::Ok(). + * Will update the shard's filtering metadata. * * Additionally, splitChunk will attempt to perform top-chunk optimization. If top-chunk * optimization is performed, then the function will also return a ChunkRange, which contains the -- cgit v1.2.1