From e1f5f40fc17f99fc06dda4621564db7e31be1132 Mon Sep 17 00:00:00 2001 From: Kevin Pulo Date: Sun, 16 Jul 2017 11:07:49 +1000 Subject: SERVER-20392 remove early chunksize autosplit heuristic Plus some additional 3.4-specific jstest fixes. (cherry picked from commit ad6a668da49c61a4276749aef7529088dc3524ea) --- ...harding_last_stable_mongos_and_mixed_shards.yml | 2 + jstests/sharding/auth.js | 3 +- jstests/sharding/autosplit.js | 7 +- .../sharding/shard_existing_coll_chunk_count.js | 165 +++++++++++++++++++++ jstests/sharding/write_cmd_auto_split.js | 6 +- src/mongo/s/commands/cluster_write.cpp | 25 +--- 6 files changed, 179 insertions(+), 29 deletions(-) create mode 100644 jstests/sharding/shard_existing_coll_chunk_count.js diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index f4c741badd4..c7ff1b24395 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -95,6 +95,8 @@ selector: # TODO (SERVER-27379): Cannot create views when featureCompatibilityVersion is 3.2. # Unblacklist when the featureCompatibilityVersion is 3.4 or higher. - jstests/sharding/views.js + # SERVER-20392: Reenable when backported to 3.4 and released as last-stable. + - jstests/sharding/shard_existing_coll_chunk_count.js executor: js_test: diff --git a/jstests/sharding/auth.js b/jstests/sharding/auth.js index 61b25f10dde..037532acaec 100644 --- a/jstests/sharding/auth.js +++ b/jstests/sharding/auth.js @@ -38,7 +38,7 @@ name: "auth", mongos: 1, shards: 0, - other: {keyFile: "jstests/libs/key1", chunkSize: 1, enableAutoSplit: true}, + other: {keyFile: "jstests/libs/key1", chunkSize: 1, enableAutoSplit: false}, }); if (s.getDB('admin').runCommand('buildInfo').bits < 64) { @@ -167,6 +167,7 @@ s.getDB("test").foo.remove({}); var num = 10000; + assert.commandWorked(s.s.adminCommand({split: "test.foo", middle: {x: num / 2}})); var bulk = s.getDB("test").foo.initializeUnorderedBulkOp(); for (i = 0; i < num; i++) { bulk.insert( diff --git a/jstests/sharding/autosplit.js b/jstests/sharding/autosplit.js index bb34021487f..0eba386b6a3 100644 --- a/jstests/sharding/autosplit.js +++ b/jstests/sharding/autosplit.js @@ -4,7 +4,12 @@ (function() { 'use strict'; - var s = new ShardingTest({name: "auto1", shards: 2, mongos: 1, other: {enableAutoSplit: true}}); + var s = new ShardingTest({ + name: "auto1", + shards: 2, + mongos: 1, + other: {enableAutoSplit: true, chunkSize: 10}, + }); assert.commandWorked(s.s0.adminCommand({enablesharding: "test"})); s.ensurePrimaryShard('test', 'shard0001'); diff --git a/jstests/sharding/shard_existing_coll_chunk_count.js b/jstests/sharding/shard_existing_coll_chunk_count.js new file mode 100644 index 00000000000..60145fef712 --- /dev/null +++ b/jstests/sharding/shard_existing_coll_chunk_count.js @@ -0,0 +1,165 @@ +/** + * This test confirms that after sharding a collection with some pre-existing data, + * the resulting chunks aren't auto-split too aggressively. + */ +(function() { + 'use strict'; + + var s = new ShardingTest({ + name: "shard_existing_coll_chunk_count", + shards: 1, + mongos: 1, + other: {enableAutoSplit: true}, + }); + + assert.commandWorked(s.s.adminCommand({enablesharding: "test"})); + + var collNum = 0; + var overhead = Object.bsonsize({_id: ObjectId(), i: 1, pad: ""}); + + var getNumberChunks = function(ns) { + return s.configRS.getPrimary().getDB("config").getCollection("chunks").count({ns}); + }; + + var runCase = function(opts) { + // Expected options. + assert.gte(opts.docSize, 0); + assert.gte(opts.stages.length, 2); + + // Compute padding. + if (opts.docSize < overhead) { + var pad = ""; + } else { + var pad = (new Array(opts.docSize - overhead + 1)).join(' '); + } + + collNum++; + var db = s.getDB("test"); + var collName = "coll" + collNum; + var coll = db.getCollection(collName); + var i = 0; + var limit = 0; + var stageNum = 0; + var stage = opts.stages[stageNum]; + + // Insert initial docs. + var bulk = coll.initializeUnorderedBulkOp(); + limit += stage.numDocsToInsert; + for (; i < limit; i++) { + bulk.insert({i, pad}); + } + assert.writeOK(bulk.execute()); + + // Create shard key index. + assert.commandWorked(coll.createIndex({i: 1})); + + // Shard collection. + assert.commandWorked(s.s.adminCommand({shardcollection: coll.getFullName(), key: {i: 1}})); + + // Confirm initial number of chunks. + var numChunks = getNumberChunks(coll.getFullName()); + assert.eq(numChunks, + stage.expectedNumChunks, + 'in ' + coll.getFullName() + ' expected ' + stage.expectedNumChunks + + ' initial chunks, but found ' + numChunks + '\nopts: ' + tojson(opts) + + '\nchunks:\n' + s.getChunksString(coll.getFullName())); + + // Do the rest of the stages. + for (stageNum = 1; stageNum < opts.stages.length; stageNum++) { + stage = opts.stages[stageNum]; + + // Insert the later docs (one at a time, to maximise the autosplit effects). + limit += stage.numDocsToInsert; + for (; i < limit; i++) { + coll.insert({i, pad}); + } + + // Confirm number of chunks for this stage. + var numChunks = getNumberChunks(coll.getFullName()); + assert.eq(numChunks, + stage.expectedNumChunks, + 'in ' + coll.getFullName() + ' expected ' + stage.expectedNumChunks + + ' chunks for stage ' + stageNum + ', but found ' + numChunks + + '\nopts: ' + tojson(opts) + '\nchunks:\n' + + s.getChunksString(coll.getFullName())); + } + }; + + // Original problematic case. + runCase({ + docSize: 0, + stages: [ + {numDocsToInsert: 20000, expectedNumChunks: 1}, + {numDocsToInsert: 7, expectedNumChunks: 1}, + {numDocsToInsert: 1000, expectedNumChunks: 1}, + ], + }); + + // Original problematic case (worse). + runCase({ + docSize: 0, + stages: [ + {numDocsToInsert: 90000, expectedNumChunks: 1}, + {numDocsToInsert: 7, expectedNumChunks: 1}, + {numDocsToInsert: 1000, expectedNumChunks: 1}, + ], + }); + + // Pathological case #1. + runCase({ + docSize: 522, + stages: [ + {numDocsToInsert: 8191, expectedNumChunks: 1}, + {numDocsToInsert: 2, expectedNumChunks: 1}, + {numDocsToInsert: 1000, expectedNumChunks: 1}, + ], + }); + + // Pathological case #2. + runCase({ + docSize: 522, + stages: [ + {numDocsToInsert: 8192, expectedNumChunks: 1}, + {numDocsToInsert: 8192, expectedNumChunks: 1}, + ], + }); + + // Lower chunksize to 1MB, and restart the mongos for it to take. + assert.writeOK( + s.getDB("config").getCollection("settings").update({_id: "chunksize"}, {$set: {value: 1}}, { + upsert: true + })); + s.restartMongos(0); + + // Original problematic case, scaled down to smaller chunksize. + runCase({ + docSize: 0, + stages: [ + {numDocsToInsert: 10000, expectedNumChunks: 1}, + {numDocsToInsert: 10, expectedNumChunks: 1}, + {numDocsToInsert: 20, expectedNumChunks: 1}, + {numDocsToInsert: 40, expectedNumChunks: 1}, + {numDocsToInsert: 1000, expectedNumChunks: 1}, + ], + }); + + // Docs just smaller than half chunk size. + runCase({ + docSize: 510 * 1024, + stages: [ + {numDocsToInsert: 10, expectedNumChunks: 6}, + {numDocsToInsert: 10, expectedNumChunks: 12}, + ], + }); + + // Docs just larger than half chunk size. + runCase({ + docSize: 514 * 1024, + stages: [ + {numDocsToInsert: 10, expectedNumChunks: 10}, + {numDocsToInsert: 10, expectedNumChunks: 20}, + ], + }); + + s.stop(); +})(); diff --git a/jstests/sharding/write_cmd_auto_split.js b/jstests/sharding/write_cmd_auto_split.js index 1cf9b5ab39a..95151b1e7e9 100644 --- a/jstests/sharding/write_cmd_auto_split.js +++ b/jstests/sharding/write_cmd_auto_split.js @@ -40,7 +40,7 @@ assert.eq(1, configDB.chunks.find().itcount()); - for (var x = 0; x < 1100; x++) { + for (var x = 0; x < 3100; x++) { assert.writeOK(testDB.runCommand({ update: 'update', updates: [{q: {x: x}, u: {x: x, v: doc1k}, upsert: true}], @@ -80,7 +80,7 @@ // Note: Estimated 'chunk size' tracked by mongos is initialized with a random value so // we are going to be conservative. - for (var x = 0; x < 1100; x += 400) { + for (var x = 0; x < 3100; x += 400) { var docs = []; for (var y = 0; y < 400; y++) { @@ -101,7 +101,7 @@ assert.eq(1, configDB.chunks.find().itcount()); - for (var x = 0; x < 1100; x += 400) { + for (var x = 0; x < 3100; x += 400) { var docs = []; for (var y = 0; y < 400; y++) { diff --git a/src/mongo/s/commands/cluster_write.cpp b/src/mongo/s/commands/cluster_write.cpp index b17035993d9..565a1ddc4d7 100644 --- a/src/mongo/s/commands/cluster_write.cpp +++ b/src/mongo/s/commands/cluster_write.cpp @@ -65,28 +65,6 @@ void toBatchError(const Status& status, BatchedCommandResponse* response) { dassert(response->isValid(NULL)); } -/** - * Given a maxChunkSize configuration and the number of chunks in a particular sharded collection, - * returns an optimal chunk size to use in order to achieve a good ratio between number of chunks - * and their size. - */ -uint64_t calculateDesiredChunkSize(uint64_t maxChunkSizeBytes, uint64_t numChunks) { - // Splitting faster in early chunks helps spread out an initial load better - const uint64_t minChunkSize = 1 << 20; // 1 MBytes - - if (numChunks <= 1) { - return 1024; - } else if (numChunks < 3) { - return minChunkSize / 2; - } else if (numChunks < 10) { - return std::max(maxChunkSizeBytes / 4, minChunkSize); - } else if (numChunks < 20) { - return std::max(maxChunkSizeBytes / 2, minChunkSize); - } else { - return maxChunkSizeBytes; - } -} - /** * Returns the split point that will result in one of the chunk having exactly one document. Also * returns an empty document if the split point cannot be determined. @@ -341,8 +319,7 @@ void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx, const uint64_t chunkBytesWritten = chunk->addBytesWritten(dataWritten); - const uint64_t desiredChunkSize = - calculateDesiredChunkSize(balancerConfig->getMaxChunkSizeBytes(), manager->numChunks()); + const uint64_t desiredChunkSize = balancerConfig->getMaxChunkSizeBytes(); // If this chunk is at either end of the range, trigger auto-split at 10% less data written in // order to trigger the top-chunk optimization. -- cgit v1.2.1