summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Gratzer <simon.gratzer@mongodb.com>2021-05-11 16:51:47 +0200
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-01 14:40:56 +0000
commitc58adbce4f628f0da863bd779ada0512e5b242ec (patch)
tree029481487cbb315fd35de4eafeb62ae4c885df6c
parenteb43e20229bdb20abdcaca1d89b965a22ac44584 (diff)
downloadmongo-v4.2.12.tar.gz
SERVER-48653 Return updated ShardVersion in _configsvrCommitChunkSplit to avoid blind metadata refresh (BACKPORT-9049)v4.2.12
-rw-r--r--jstests/sharding/major_version_check.js12
-rw-r--r--jstests/sharding/merge_split_chunks_test.js157
-rw-r--r--src/mongo/db/s/chunk_splitter.cpp25
-rw-r--r--src/mongo/db/s/config/configsvr_split_chunk_command.cpp6
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager.h17
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp18
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.cpp3
-rw-r--r--src/mongo/db/s/split_chunk.cpp22
-rw-r--r--src/mongo/db/s/split_chunk.h1
9 files changed, 208 insertions, 53 deletions
diff --git a/jstests/sharding/major_version_check.js b/jstests/sharding/major_version_check.js
index 549b92ea47f..bc930a8f117 100644
--- a/jstests/sharding/major_version_check.js
+++ b/jstests/sharding/major_version_check.js
@@ -100,18 +100,6 @@
printjson(staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""}));
- assert.eq(Timestamp(1, 0),
- staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""}).version);
-
- // See if our stale mongos is required to catch up to run a findOne on a new connection
- staleMongos = new Mongo(staleMongos.host);
- staleMongos.getCollection(coll + "").findOne();
-
- printjson(staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""}));
-
- assert.eq(Timestamp(1, 0),
- staleMongos.getDB("admin").runCommand({getShardVersion: coll + ""}).version);
-
// Run another split on the original chunk, which does not exist anymore (but the stale mongos
// thinks it exists). This should fail and cause a refresh on the shard, updating its shard
// version.
diff --git a/jstests/sharding/merge_split_chunks_test.js b/jstests/sharding/merge_split_chunks_test.js
new file mode 100644
index 00000000000..2cc5a69a112
--- /dev/null
+++ b/jstests/sharding/merge_split_chunks_test.js
@@ -0,0 +1,157 @@
+//
+// Tests that merge, split and move chunks via mongos works/doesn't work with different chunk
+// configurations
+//
+(function() {
+'use strict';
+
+/**
+ * Performs a find() on config.chunks on 'configDB', targeting chunks for the collection 'ns',
+ * and the optional 'extraQuery' and 'projection'.
+ * Chooses to query chunks by their 'ns' or uuid' fields according to it's config.collection
+ * entry having 'timestamp' or not.
+ */
+let findChunksByNs = function(configDB, ns, extraQuery = null, projection = null) {
+ const chunksQuery = Object.assign({ns: ns}, extraQuery);
+ return configDB.chunks.find(chunksQuery, projection);
+};
+
+var st = new ShardingTest({shards: 2, mongos: 2});
+
+var mongos = st.s0;
+var staleMongos = st.s1;
+var admin = mongos.getDB("admin");
+var coll = mongos.getCollection("foo.bar");
+
+assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
+st.ensurePrimaryShard('foo', st.shard0.shardName);
+assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}));
+
+// Create ranges MIN->0,0->10,(hole),20->40,40->50,50->90,(hole),100->110,110->MAX on first
+// shard
+jsTest.log("Creating ranges...");
+
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 10}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 20}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 40}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 50}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 90}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 100}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 110}}));
+
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 10}, to: st.shard1.shardName}));
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 90}, to: st.shard1.shardName}));
+
+st.printShardingStatus();
+
+// Insert some data into each of the consolidated ranges
+let numDocs = 0;
+for (let i = 120; i <= 240; i++) {
+ assert.commandWorked(coll.insert({_id: i}));
+ numDocs++;
+}
+
+var staleCollection = staleMongos.getCollection(coll + "");
+
+// S0: min->0, 0->10, 20->40, 40->50, 50->90, 100->110, 110->max
+// S1: 10->20, 90->100
+assert.eq(9, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+
+jsTest.log("Trying merges that should succeed...");
+
+// Make sure merge including the MinKey works
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 10}]}));
+assert.eq(8, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+assert.eq(numDocs, staleCollection.find().itcount());
+// S0: min->10, 20->40, 40->50, 50->90, 100->110, 110->max
+// S1: 10->20, 90->100
+
+// Make sure merging three chunks in the middle works
+assert.commandWorked(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 90}]}));
+assert.eq(6, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+assert.eq(numDocs, staleCollection.find().itcount());
+// S0: min->10, 20->90, 100->110, 110->max
+// S1: 10->20, 90->100
+
+// Make sure splitting chunks after merging works
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 55}}));
+assert.eq(7, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+assert.eq(numDocs, staleCollection.find().itcount());
+// S0: min->10, 20->55, 55->90, 100->110, 110->max
+// S1: 10->20, 90->100
+
+// make sure moving the new chunk works
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 20}, to: st.shard1.shardName}));
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 55}, to: st.shard1.shardName}));
+assert.eq(7, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+assert.eq(numDocs, staleCollection.find().itcount());
+// S0: min->10, 100->110, 110->max
+// S1: 10->20, 20->55, 55->90, 90->100
+
+// Make sure merge including the MaxKey works
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 100}, {_id: MaxKey}]}));
+assert.eq(numDocs, staleCollection.find().itcount());
+assert.eq(6, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+// S0: min->10, 100->max
+// S1: 10->20, 20->55, 55->90, 90->100
+
+// Make sure merging chunks after a chunk has been moved out of a shard succeeds
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 110}, to: st.shard1.shardName}));
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 10}, to: st.shard0.shardName}));
+assert.eq(numDocs, staleCollection.find().itcount());
+assert.eq(6, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+// S0: min->10, 10->20
+// S1: 20->55, 55->90, 90->100, 100->max
+
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 90}, {_id: MaxKey}]}));
+assert.commandWorked(admin.runCommand({mergeChunks: coll + "", bounds: [{_id: 20}, {_id: 90}]}));
+assert.eq(numDocs, staleCollection.find().itcount());
+// S0: min->10, 10->20
+// S1: 20->90, 90->max
+
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 15}}));
+assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 30}}));
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 30}, to: st.shard0.shardName}));
+assert.eq(numDocs, staleCollection.find().itcount());
+// S0: min->10, 10->15, 15->20, 30->90
+// S1: 20->30, 90->max
+
+// range has ha hole on shard 0
+assert.commandFailed(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 90}]}));
+
+// Make sure merge on the other shard after a chunk has been merged succeeds
+assert.commandWorked(
+ admin.runCommand({moveChunk: coll + "", find: {_id: 20}, to: st.shard0.shardName}));
+assert.commandWorked(
+ admin.runCommand({mergeChunks: coll + "", bounds: [{_id: MinKey}, {_id: 90}]}));
+// S0: min->90
+// S1: 90->max
+
+st.printShardingStatus(true);
+
+assert.eq(2, findChunksByNs(st.s0.getDB('config'), 'foo.bar').itcount());
+assert.eq(1, findChunksByNs(st.s0.getDB('config'), 'foo.bar', {
+ 'min._id': MinKey,
+ 'max._id': 90,
+ shard: st.shard0.shardName
+ }).itcount());
+assert.eq(1, findChunksByNs(st.s0.getDB('config'), 'foo.bar', {
+ 'min._id': 90,
+ 'max._id': MaxKey,
+ shard: st.shard1.shardName
+ }).itcount());
+
+st.stop();
+})();
diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp
index 58a542c0a07..d09c74eb683 100644
--- a/src/mongo/db/s/chunk_splitter.cpp
+++ b/src/mongo/db/s/chunk_splitter.cpp
@@ -93,15 +93,15 @@ Status splitChunkAtMultiplePoints(OperationContext* opCtx,
<< " parts at a time."};
}
- const auto status = splitChunk(opCtx,
- nss,
- shardKeyPattern.toBSON(),
- chunkRange,
- splitPoints,
- shardId.toString(),
- collectionVersion.epoch());
-
- return status.getStatus().withContext("split failed");
+ return splitChunk(opCtx,
+ nss,
+ shardKeyPattern.toBSON(),
+ chunkRange,
+ splitPoints,
+ shardId.toString(),
+ collectionVersion.epoch())
+ .getStatus()
+ .withContext("split failed");
}
/**
@@ -397,13 +397,6 @@ void ChunkSplitter::_runAutosplit(std::shared_ptr<ChunkSplitStateDriver> chunkSp
: " (top chunk migration suggested" +
(std::string)(shouldBalance ? ")" : ", but no migrations allowed)"));
- // Because the ShardServerOpObserver uses the metadata from the CSS for tracking incoming
- // writes, if we split a chunk but do not force a CSS refresh, subsequent inserts will see
- // stale metadata and so will not trigger a chunk split. If we force metadata refresh here,
- // we can limit the amount of time that the op observer is tracking writes on the parent
- // chunk rather than on its child chunks.
- forceShardFilteringMetadataRefresh(opCtx.get(), nss, false);
-
// Balance the resulting chunks if the autobalance option is enabled and if we split at the
// first or last chunk on the collection as part of top chunk optimization.
if (!shouldBalance || topChunkMinKey.isEmpty()) {
diff --git a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp
index 5451137ad59..c62bca910e9 100644
--- a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp
+++ b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp
@@ -112,14 +112,14 @@ public:
auto parsedRequest = uassertStatusOK(SplitChunkRequest::parseFromConfigCommand(cmdObj));
- Status splitChunkResult =
+ auto shardVers = uassertStatusOK(
ShardingCatalogManager::get(opCtx)->commitChunkSplit(opCtx,
parsedRequest.getNamespace(),
parsedRequest.getEpoch(),
parsedRequest.getChunkRange(),
parsedRequest.getSplitPoints(),
- parsedRequest.getShardName());
- uassertStatusOK(splitChunkResult);
+ parsedRequest.getShardName()));
+ result.appendElements(shardVers);
return true;
}
diff --git a/src/mongo/db/s/config/sharding_catalog_manager.h b/src/mongo/db/s/config/sharding_catalog_manager.h
index 4e5f0d61241..21e6121cb5d 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager.h
+++ b/src/mongo/db/s/config/sharding_catalog_manager.h
@@ -179,13 +179,16 @@ public:
/**
* Updates metadata in the config.chunks collection to show the given chunk as split into
* smaller chunks at the specified split points.
- */
- Status commitChunkSplit(OperationContext* opCtx,
- const NamespaceString& nss,
- const OID& requestEpoch,
- const ChunkRange& range,
- const std::vector<BSONObj>& splitPoints,
- const std::string& shardName);
+ *
+ * Returns a BSON object with the newly produced chunk version after the migration:
+ * - shardVersion - The new shard version of the source shard
+ */
+ StatusWith<BSONObj> commitChunkSplit(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& requestEpoch,
+ const ChunkRange& range,
+ const std::vector<BSONObj>& splitPoints,
+ const std::string& shardName);
/**
* Updates metadata in the config.chunks collection so the chunks with given boundaries are seen
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
index f07c54d6341..68d291d91dc 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -287,12 +287,13 @@ StatusWith<ChunkVersion> getMaxChunkVersionFromQueryResponse(
} // namespace
-Status ShardingCatalogManager::commitChunkSplit(OperationContext* opCtx,
- const NamespaceString& nss,
- const OID& requestEpoch,
- const ChunkRange& range,
- const std::vector<BSONObj>& splitPoints,
- const std::string& shardName) {
+StatusWith<BSONObj> ShardingCatalogManager::commitChunkSplit(
+ OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& requestEpoch,
+ const ChunkRange& range,
+ const std::vector<BSONObj>& splitPoints,
+ const std::string& shardName) {
// Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and
// migrations
// TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/
@@ -523,7 +524,10 @@ Status ShardingCatalogManager::commitChunkSplit(OperationContext* opCtx,
}
}
- return Status::OK();
+ // currentMaxVersion contains shard version with incremented minor version
+ BSONObjBuilder result;
+ currentMaxVersion.appendToCommand(&result);
+ return result.obj();
}
Status ShardingCatalogManager::commitChunkMerge(OperationContext* opCtx,
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
index 2d8b2359378..49f5b486afa 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
@@ -82,8 +82,7 @@ void onShardVersionMismatch(OperationContext* opCtx,
}();
if (currentShardVersion) {
- if (currentShardVersion->epoch() == shardVersionReceived.epoch() &&
- currentShardVersion->majorVersion() >= shardVersionReceived.majorVersion()) {
+ if (shardVersionReceived.isOlderThan(*currentShardVersion)) {
// Don't need to remotely reload if we're in the same epoch and the requested version is
// smaller than the one we know about. This means that the remote side is behind.
return;
diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp
index 9e98c376ede..71bd707ae03 100644
--- a/src/mongo/db/s/split_chunk.cpp
+++ b/src/mongo/db/s/split_chunk.cpp
@@ -188,9 +188,22 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx,
return cmdResponseStatus.getStatus();
}
+ // old versions might not have the shardVersion field
+ const Shard::CommandResponse& cmdResponse = cmdResponseStatus.getValue();
+ if (cmdResponse.response[ChunkVersion::kShardVersionField]) {
+ const auto cv = uassertStatusOK(
+ ChunkVersion::parseWithField(cmdResponse.response, ChunkVersion::kShardVersionField));
+ uassertStatusOK(onShardVersionMismatchNoExcept(
+ opCtx, nss, std::move(cv), true /* forceRefreshFromThisThread */));
+ } else {
+ // Refresh metadata to pick up new chunk definitions (regardless of the results returned
+ // from running _configsvrCommitChunkMerge).
+ forceShardFilteringMetadataRefresh(opCtx, nss, true /* forceRefreshFromThisThread */);
+ }
+
// Check commandStatus and writeConcernStatus
- auto commandStatus = cmdResponseStatus.getValue().commandStatus;
- auto writeConcernStatus = cmdResponseStatus.getValue().writeConcernStatus;
+ auto commandStatus = cmdResponse.commandStatus;
+ auto writeConcernStatus = cmdResponse.writeConcernStatus;
// Send stale epoch if epoch of request did not match epoch of collection
if (commandStatus == ErrorCodes::StaleEpoch) {
@@ -198,14 +211,12 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx,
}
//
- // If _configsvrCommitChunkSplit returned an error, refresh and look at the metadata to
+ // If _configsvrCommitChunkSplit returned an error, look at the metadata to
// determine if the split actually did happen. This can happen if there's a network error
// getting the response from the first call to _configsvrCommitChunkSplit, but it actually
// succeeds, thus the automatic retry fails with a precondition violation, for example.
//
if (!commandStatus.isOK() || !writeConcernStatus.isOK()) {
- forceShardFilteringMetadataRefresh(opCtx, nss);
-
if (checkMetadataForSuccessfulSplitChunk(
opCtx, nss, expectedCollectionEpoch, chunkRange, splitKeys)) {
// Split was committed.
@@ -249,7 +260,6 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx,
checkIfSingleDoc(opCtx, collection, idx, &frontChunk)) {
return boost::optional<ChunkRange>(ChunkRange(frontChunk.getMin(), frontChunk.getMax()));
}
-
return boost::optional<ChunkRange>(boost::none);
}
diff --git a/src/mongo/db/s/split_chunk.h b/src/mongo/db/s/split_chunk.h
index aaa074f2f55..6f394d397e5 100644
--- a/src/mongo/db/s/split_chunk.h
+++ b/src/mongo/db/s/split_chunk.h
@@ -48,6 +48,7 @@ class StatusWith;
* Attempts to split a chunk with the specified parameters. If the split fails, then the StatusWith
* object returned will contain a Status with an ErrorCode regarding the cause of failure. If the
* split succeeds, then the StatusWith object returned will contain Status::Ok().
+ * Will update the shard's filtering metadata.
*
* Additionally, splitChunk will attempt to perform top-chunk optimization. If top-chunk
* optimization is performed, then the function will also return a ChunkRange, which contains the