summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <mao.cheahuychou@gmail.com>2020-10-28 15:03:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-28 17:38:04 +0000
commitc6635ba1c55a508eeb35e82277c732d4caa9cea9 (patch)
tree6ac51bee29c86f32142792366bdc0dcdb897f0bf
parent100c645fe1655c03c9fcb478f402a973e5c39278 (diff)
downloadmongo-c6635ba1c55a508eeb35e82277c732d4caa9cea9.tar.gz
SERVER-45624 Make the balancer split the sessions collection
(cherry picked from commit a8f80d013ee948e04671b1814d9f3989f6ea8314)
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml2
-rw-r--r--jstests/sharding/balancing_sessions_collection.js159
-rw-r--r--src/mongo/db/s/balancer/balancer.cpp8
-rw-r--r--src/mongo/db/s/balancer/balancer.h11
-rw-r--r--src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp148
-rw-r--r--src/mongo/shell/servers.js18
6 files changed, 315 insertions, 31 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index c47c11a6eec..42affa9b642 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -28,6 +28,8 @@ selector:
- jstests/sharding/index_and_collection_option_propagation.js
# New feature in v3.6 mongos
- jstests/sharding/logical_time_metadata.js
+ # New feature in v3.6 mongod.
+ - jstests/sharding/balancing_sessions_collection.js
# New feature in v3.6 mongos and mongod.
- jstests/sharding/advance_cluster_time_action_type.js
- jstests/sharding/advance_logical_time_with_valid_signature.js
diff --git a/jstests/sharding/balancing_sessions_collection.js b/jstests/sharding/balancing_sessions_collection.js
new file mode 100644
index 00000000000..17292d13c9b
--- /dev/null
+++ b/jstests/sharding/balancing_sessions_collection.js
@@ -0,0 +1,159 @@
+/*
+ * Tests that the balancer splits the sessions collection and uniformly distributes the chunks
+ * across shards in the cluster.
+ * @tags: [resource_intensive]
+ */
+(function() {
+ "use strict";
+
+ /*
+ * Returns the number of chunks for the sessions collection.
+ */
+ function getNumTotalChunks() {
+ return configDB.chunks.count({ns: kSessionsNs});
+ }
+
+ /*
+ * Returns the number of chunks for the sessions collection that are the given shard.
+ */
+ function getNumChunksOnShard(shardName) {
+ return configDB.chunks.count({ns: kSessionsNs, shard: shardName});
+ }
+
+ /*
+ * Returns the number of docs in the sessions collection on the given host.
+ */
+ function getNumDocs(conn) {
+ return conn.getCollection(kSessionsNs).count();
+ }
+
+ /*
+ * Starts a replica-set shard, adds the shard to the cluster, and increments numShards.
+ * Returns the ReplSetTest object for the shard.
+ */
+ function addShardToCluster() {
+ const shardName = clusterName + "-rs" + numShards;
+
+ const replTest = new ReplSetTest({name: shardName, nodes: 1});
+ replTest.startSet({shardsvr: ""});
+ replTest.initiate();
+
+ assert.commandWorked(st.s.adminCommand({addShard: replTest.getURL(), name: shardName}));
+ numShards++;
+ return replTest;
+ }
+
+ /*
+ * Removes the given shard from the cluster, waits util the state is completed, and
+ * decrements numShards.
+ */
+ function removeShardFromCluster(shardName) {
+ assert.commandWorked(st.s.adminCommand({removeShard: shardName}));
+ assert.soon(function() {
+ const res = st.s.adminCommand({removeShard: shardName});
+ assert.commandWorked(res);
+ return ("completed" == res.state);
+ }, "failed to remove shard " + shardName, kBalancerTimeoutMS);
+ numShards--;
+ }
+
+ /*
+ * Returns true if the chunks for the sessions collection are evenly distributed across the
+ * given shards. That is, the number of chunks on the most loaded shard and on the least
+ * loaded shard differs by no more than 1.
+ */
+ function isBalanced(shardNames) {
+ const expectedMinNumChunksPerShard = Math.floor(kExpectedNumChunks / shardNames.length);
+
+ let minNumChunks = Number.MAX_VALUE;
+ let maxNumChunks = 0;
+ for (let shardName of shardNames) {
+ const numChunks = getNumChunksOnShard(shardName);
+ minNumChunks = Math.min(numChunks, minNumChunks);
+ maxNumChunks = Math.max(numChunks, maxNumChunks);
+ }
+
+ return (maxNumChunks - minNumChunks <= 1) && (minNumChunks == expectedMinNumChunksPerShard);
+ }
+
+ /*
+ * Returns the standard deviation for given numbers.
+ */
+ function computeStdDev(nums) {
+ const mean = nums.reduce((a, b) => a + b) / nums.length;
+ return Math.sqrt(nums.map(x => Math.pow(x - mean, 2)).reduce((a, b) => a + b) /
+ nums.length);
+ }
+
+ const kMinNumChunks = 100;
+ const kExpectedNumChunks = 128; // the balancer rounds kMinNumChunks to the next power of 2.
+ const kNumSessions = 10000;
+ const kBalancerTimeoutMS = 5 * 60 * 1000;
+
+ let numShards = 2;
+ const clusterName = jsTest.name();
+ const st = new ShardingTest({
+ name: clusterName,
+ shards: numShards,
+ other:
+ {configOptions: {setParameter: {minNumChunksForSessionsCollection: kMinNumChunks}}}
+ });
+ const kSessionsNs = "config.system.sessions";
+ const configDB = st.s.getDB("config");
+
+ // There is only one chunk initially.
+ assert.eq(1, getNumTotalChunks());
+
+ st.startBalancer();
+
+ jsTest.log(
+ "Verify that the balancer splits the initial chunks and distributes chunks evenly across existing shards");
+
+ assert.soon(() => getNumTotalChunks() == kExpectedNumChunks,
+ "balancer did not split the initial chunk for the sessions collection");
+ assert.soon(() => isBalanced([st.shard0.shardName, st.shard1.shardName]),
+ "balancer did not distribute chunks evenly across existing shards",
+ kBalancerTimeoutMS);
+
+ jsTest.log(
+ "Verify that the balancer redistributes chunks when more shards are added to the cluster");
+ const shard2 = addShardToCluster();
+ const shard3 = addShardToCluster();
+ const shard4 = addShardToCluster();
+
+ assert.soon(
+ () => isBalanced(
+ [st.shard0.shardName, st.shard1.shardName, shard2.name, shard3.name, shard4.name]),
+ "balancer did not redistribute chunks evenly after more shards were added",
+ kBalancerTimeoutMS);
+
+ jsTest.log("Verify that the session docs are distributed almost evenly across shards");
+ // Start sessions and trigger a refresh to flush the sessions to the sessions collection.
+ for (let i = 0; i < kNumSessions; i++) {
+ assert.commandWorked(st.s.adminCommand({startSession: 1}));
+ }
+ assert.commandWorked(st.s.adminCommand({refreshLogicalSessionCacheNow: 1}));
+ assert.lte(kNumSessions, getNumDocs(st.s));
+
+ const shards =
+ [st.shard0, st.shard1, shard2.getPrimary(), shard3.getPrimary(), shard4.getPrimary()];
+ const numDocsOnShards = shards.map(shard => getNumDocs(shard));
+ assert.lt(computeStdDev(numDocsOnShards), 0.1 * kNumSessions / shards.length);
+
+ jsTest.log(
+ "Verify that the balancer redistributes chunks when shards are removed from the cluster");
+ removeShardFromCluster(shard2.name);
+
+ assert.soon(
+ () => isBalanced([st.shard0.shardName, st.shard1.shardName, shard3.name, shard4.name]),
+ "balancer did not redistribute chunks evenly after one of the shards was removed",
+ kBalancerTimeoutMS);
+ assert.eq(0, getNumChunksOnShard(shard2.name));
+
+ st.stopBalancer();
+
+ st.stop();
+ shard2.stopSet();
+ shard3.stopSet();
+ shard4.stopSet();
+}());
diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp
index 6caddbe97f5..41367734c8d 100644
--- a/src/mongo/db/s/balancer/balancer.cpp
+++ b/src/mongo/db/s/balancer/balancer.cpp
@@ -367,9 +367,9 @@ void Balancer::_mainThread() {
OCCASIONALLY warnOnMultiVersion(
uassertStatusOK(_clusterStats->getStats(opCtx.get())));
- Status status = _enforceTagRanges(opCtx.get());
+ Status status = _splitChunksIfNeeded(opCtx.get());
if (!status.isOK()) {
- warning() << "Failed to enforce tag ranges" << causedBy(status);
+ warning() << "Failed to split chunks" << causedBy(status);
} else {
LOG(1) << "Done enforcing tag range boundaries.";
}
@@ -540,7 +540,7 @@ bool Balancer::_checkOIDs(OperationContext* opCtx) {
return true;
}
-Status Balancer::_enforceTagRanges(OperationContext* opCtx) {
+Status Balancer::_splitChunksIfNeeded(OperationContext* opCtx) {
auto chunksToSplitStatus = _chunkSelectionPolicy->selectChunksToSplit(opCtx);
if (!chunksToSplitStatus.isOK()) {
return chunksToSplitStatus.getStatus();
@@ -565,7 +565,7 @@ Status Balancer::_enforceTagRanges(OperationContext* opCtx) {
ChunkRange(splitInfo.minKey, splitInfo.maxKey),
splitInfo.splitKeys);
if (!splitStatus.isOK()) {
- warning() << "Failed to enforce tag range for chunk " << redact(splitInfo.toString())
+ warning() << "Failed to split chunk " << redact(splitInfo.toString())
<< causedBy(redact(splitStatus.getStatus()));
}
}
diff --git a/src/mongo/db/s/balancer/balancer.h b/src/mongo/db/s/balancer/balancer.h
index da54de484ac..6fd8b7fbc4b 100644
--- a/src/mongo/db/s/balancer/balancer.h
+++ b/src/mongo/db/s/balancer/balancer.h
@@ -183,10 +183,15 @@ private:
bool _checkOIDs(OperationContext* opCtx);
/**
- * Iterates through all chunks in all collections and ensures that no chunks straddle tag
- * boundary. If any do, they will be split.
+ * Iterates through all chunks in all collections. If the collection is the sessions collection,
+ * checks if the number of chunks is greater than or equal to the configured minimum number of
+ * chunks for the sessions collection (minNumChunksForSessionsCollection). If it isn't,
+ * calculates split points that evenly partition the key space into N ranges (where N is
+ * minNumChunksForSessionsCollection rounded up the next power of 2), and splits any chunks that
+ * straddle those split points. If the collection is any other collection, splits any chunks
+ * that straddle tag boundaries.
*/
- Status _enforceTagRanges(OperationContext* opCtx);
+ Status _splitChunksIfNeeded(OperationContext* opCtx);
/**
* Schedules migrations for the specified set of chunks and returns how many chunks were
diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp
index 25b328b3fd4..47610d5bec3 100644
--- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp
+++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_impl.cpp
@@ -39,6 +39,8 @@
#include "mongo/base/status_with.h"
#include "mongo/bson/bsonobj_comparator_interface.h"
+#include "mongo/db/server_parameters.h"
+#include "mongo/platform/bits.h"
#include "mongo/s/catalog/sharding_catalog_client.h"
#include "mongo/s/catalog/type_chunk.h"
#include "mongo/s/catalog/type_collection.h"
@@ -51,6 +53,20 @@
namespace mongo {
+// The minimum number of chunks for config.system.sessions collection.
+MONGO_EXPORT_SERVER_PARAMETER(minNumChunksForSessionsCollection, int, 1024)
+ ->withValidator([](const int& newVal) {
+ if (newVal < 1) {
+ return Status(ErrorCodes::BadValue,
+ "minNumChunksForSessionsCollection must not be less than 1");
+ }
+ if (newVal > 1000000) {
+ return Status(ErrorCodes::BadValue,
+ "minNumChunksForSessionsCollection must not be greater than 1000000");
+ }
+ return Status::OK();
+ });
+
using MigrateInfoVector = BalancerChunkSelectionPolicy::MigrateInfoVector;
using SplitInfoVector = BalancerChunkSelectionPolicy::SplitInfoVector;
using std::shared_ptr;
@@ -177,6 +193,95 @@ private:
BSONObjIndexedMap<BalancerChunkSelectionPolicy::SplitInfo> _chunkSplitPoints;
};
+/**
+ * Populates splitCandidates with chunk and splitPoint pairs for chunks that violate tag
+ * range boundaries.
+ */
+void getSplitCandidatesToEnforceTagRanges(const ChunkManager* cm,
+ const DistributionStatus& distribution,
+ SplitCandidatesBuffer* splitCandidates) {
+ const auto& globalMax = cm->getShardKeyPattern().getKeyPattern().globalMax();
+
+ // For each tag range, find chunks that need to be split.
+ for (const auto& tagRangeEntry : distribution.tagRanges()) {
+ const auto& tagRange = tagRangeEntry.second;
+
+ const auto chunkAtZoneMin = cm->findIntersectingChunkWithSimpleCollation(tagRange.min);
+ invariant(chunkAtZoneMin->getMax().woCompare(tagRange.min) > 0);
+
+ if (chunkAtZoneMin->getMin().woCompare(tagRange.min)) {
+ splitCandidates->addSplitPoint(chunkAtZoneMin, tagRange.min);
+ }
+
+ // The global max key can never fall in the middle of a chunk.
+ if (!tagRange.max.woCompare(globalMax))
+ continue;
+
+ const auto chunkAtZoneMax = cm->findIntersectingChunkWithSimpleCollation(tagRange.max);
+
+ // We need to check that both the chunk's minKey does not match the zone's max and also that
+ // the max is not equal, which would only happen in the case of the zone ending in MaxKey.
+ if (chunkAtZoneMax->getMin().woCompare(tagRange.max) &&
+ chunkAtZoneMax->getMax().woCompare(tagRange.max)) {
+ splitCandidates->addSplitPoint(chunkAtZoneMax, tagRange.max);
+ }
+ }
+}
+
+/**
+ * If the number of chunks as given by the ChunkManager is less than the configured minimum
+ * number of chunks for the sessions collection (minNumChunksForSessionsCollection), calculates
+ * split points that evenly partition the key space into N ranges (where N is
+ * minNumChunksForSessionsCollection rounded up to the next power of 2), and populates
+ * splitCandidates with chunk and splitPoint pairs for chunks that need to split.
+ */
+void getSplitCandidatesForSessionsCollection(OperationContext* opCtx,
+ const ChunkManager* cm,
+ SplitCandidatesBuffer* splitCandidates) {
+ const auto minNumChunks = minNumChunksForSessionsCollection.load();
+
+ if (cm->numChunks() >= minNumChunks) {
+ return;
+ }
+
+ // Use the next power of 2 as the target number of chunks.
+ const size_t numBits = 64 - countLeadingZeros64(minNumChunks - 1);
+ const uint32_t numChunks = 1 << numBits;
+
+ // Compute split points for _id.id that partition the UUID 128-bit data space into numChunks
+ // equal ranges. Since the numChunks is a power of 2, the split points are the permutations
+ // of the prefix numBits right-padded with 0's.
+ std::vector<BSONObj> splitPoints;
+ for (uint32_t i = 1; i < numChunks; i++) {
+ // Start with a buffer of 0's.
+ std::array<uint8_t, 16> buf{0b0};
+
+ // Left-shift i to fill the remaining bits in the prefix 32 bits with 0's.
+ const uint32_t high = i << (CHAR_BIT * 4 - numBits);
+
+ // Fill the prefix 4 bytes with high's bytes.
+ buf[0] = static_cast<uint8_t>(high >> CHAR_BIT * 3);
+ buf[1] = static_cast<uint8_t>(high >> CHAR_BIT * 2);
+ buf[2] = static_cast<uint8_t>(high >> CHAR_BIT * 1);
+ buf[3] = static_cast<uint8_t>(high);
+
+ ConstDataRange cdr(reinterpret_cast<const char*>(buf.data()), sizeof(buf));
+ splitPoints.push_back(BSON("_id" << BSON("id" << UUID::fromCDR(cdr))));
+ }
+
+ // For each split point, find a chunk that needs to be split.
+ for (auto& splitPoint : splitPoints) {
+ const auto chunkAtSplitPoint = cm->findIntersectingChunkWithSimpleCollation(splitPoint);
+ invariant(chunkAtSplitPoint->getMax().woCompare(splitPoint) > 0);
+
+ if (chunkAtSplitPoint->getMin().woCompare(splitPoint)) {
+ splitCandidates->addSplitPoint(chunkAtSplitPoint, splitPoint);
+ }
+ }
+
+ return;
+}
+
} // namespace
BalancerChunkSelectionPolicyImpl::BalancerChunkSelectionPolicyImpl(ClusterStatistics* clusterStats,
@@ -222,8 +327,13 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::selectChunksToSpli
// Namespace got dropped before we managed to get to it, so just skip it
continue;
} else if (!candidatesStatus.isOK()) {
- warning() << "Unable to enforce tag range policy for collection " << nss.ns()
- << causedBy(candidatesStatus.getStatus());
+ if (nss.ns() == "config.system.sessions") {
+ warning() << "Unable to split sessions collection chunks "
+ << causedBy(candidatesStatus.getStatus());
+ } else {
+ warning() << "Unable to enforce tag range policy for collection " << nss.ns()
+ << causedBy(candidatesStatus.getStatus());
+ }
continue;
}
@@ -377,8 +487,6 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidate
const auto cm = routingInfoStatus.getValue().cm().get();
- const auto& shardKeyPattern = cm->getShardKeyPattern().getKeyPattern();
-
const auto collInfoStatus = createCollectionDistributionStatus(opCtx, shardStats, cm);
if (!collInfoStatus.isOK()) {
return collInfoStatus.getStatus();
@@ -389,28 +497,20 @@ StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidate
// Accumulate split points for the same chunk together
SplitCandidatesBuffer splitCandidates(nss, cm->getVersion());
- for (const auto& tagRangeEntry : distribution.tagRanges()) {
- const auto& tagRange = tagRangeEntry.second;
-
- const auto chunkAtZoneMin = cm->findIntersectingChunkWithSimpleCollation(tagRange.min);
- invariant(chunkAtZoneMin->getMax().woCompare(tagRange.min) > 0);
-
- if (chunkAtZoneMin->getMin().woCompare(tagRange.min)) {
- splitCandidates.addSplitPoint(chunkAtZoneMin, tagRange.min);
+ if (nss.ns() == "config.system.sessions") {
+ if (!distribution.tags().empty()) {
+ str::stream builder;
+ builder << "Ignoring zones for the sessions collection: ";
+ for (const auto& tag : distribution.tags()) {
+ builder << tag << ", ";
+ }
+ const std::string msg = builder;
+ warning() << msg;
}
- // The global max key can never fall in the middle of a chunk
- if (!tagRange.max.woCompare(shardKeyPattern.globalMax()))
- continue;
-
- const auto chunkAtZoneMax = cm->findIntersectingChunkWithSimpleCollation(tagRange.max);
-
- // We need to check that both the chunk's minKey does not match the zone's max and also that
- // the max is not equal, which would only happen in the case of the zone ending in MaxKey.
- if (chunkAtZoneMax->getMin().woCompare(tagRange.max) &&
- chunkAtZoneMax->getMax().woCompare(tagRange.max)) {
- splitCandidates.addSplitPoint(chunkAtZoneMax, tagRange.max);
- }
+ getSplitCandidatesForSessionsCollection(opCtx, cm, &splitCandidates);
+ } else {
+ getSplitCandidatesToEnforceTagRanges(cm, distribution, &splitCandidates);
}
return splitCandidates.done();
diff --git a/src/mongo/shell/servers.js b/src/mongo/shell/servers.js
index e6fb65ecc12..3f613f90f95 100644
--- a/src/mongo/shell/servers.js
+++ b/src/mongo/shell/servers.js
@@ -999,6 +999,14 @@ var MongoRunner, _startMongod, startMongoProgram, runMongoProgram, startMongoPro
.length > 0);
}
+ function argArrayContainsSetParameterValue(value) {
+ assert(value.endsWith("="),
+ "Expected value argument to be of the form <parameterName>=");
+ return argArray.some(function(el) {
+ return typeof el === "string" && el.startsWith(value);
+ });
+ }
+
// programName includes the version, e.g., mongod-3.2.
// baseProgramName is the program name without any version information, e.g., mongod.
let programName = argArray[0];
@@ -1074,6 +1082,16 @@ var MongoRunner, _startMongod, startMongoProgram, runMongoProgram, startMongoPro
}
}
+ // New mongod-specific options in 3.6.x
+ if (!programMajorMinorVersion || programMajorMinorVersion >= 360) {
+ if ((jsTest.options().setParameters === undefined ||
+ jsTest.options().setParameters['minNumChunksForSessionsCollection'] ===
+ undefined) &&
+ !argArrayContainsSetParameterValue('minNumChunksForSessionsCollection=')) {
+ argArray.push(...['--setParameter', "minNumChunksForSessionsCollection=1"]);
+ }
+ }
+
// TODO: Make this unconditional in 3.8.
if (!programMajorMinorVersion || programMajorMinorVersion > 304) {
let hasParam = false;