summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierlauro Sciarelli <pierlauro.sciarelli@mongodb.com>2021-07-21 13:45:20 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-07-22 09:07:48 +0000
commit1f4dd25739c182d7528847dd3303598a751d1390 (patch)
tree9dbeb280c8bbbcb4c8af1cd6e30c9d688c8b3061
parent57c5085d7524540fed7aa83d5e3f149d27c355fd (diff)
downloadmongo-1f4dd25739c182d7528847dd3303598a751d1390.tar.gz
SERVER-58664 Simplify and comment autoSplitVector
-rw-r--r--src/mongo/db/s/auto_split_vector.cpp319
1 files changed, 170 insertions, 149 deletions
diff --git a/src/mongo/db/s/auto_split_vector.cpp b/src/mongo/db/s/auto_split_vector.cpp
index 4fad7f80a84..81933ad8ac7 100644
--- a/src/mongo/db/s/auto_split_vector.cpp
+++ b/src/mongo/db/s/auto_split_vector.cpp
@@ -49,13 +49,78 @@
namespace mongo {
namespace {
-const int kMaxObjectPerChunk{250000};
-const int estimatedAdditionalBytesPerItemInBSONArray{2};
+constexpr int estimatedAdditionalBytesPerItemInBSONArray{2};
BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) {
return key.replaceFieldNames(keyPattern).clientReadable();
}
+/*
+ * Takes the given min/max BSON objects that are a prefix of the shardKey and return two new BSON
+ * object extended to cover the entire shardKey. See KeyPattern::extendRangeBound documentation for
+ * some examples.
+ */
+const std::tuple<BSONObj, BSONObj> getMinMaxExtendedBounds(const IndexDescriptor* shardKeyIdx,
+ const BSONObj& min,
+ const BSONObj& max) {
+ KeyPattern kp(shardKeyIdx->keyPattern());
+
+ // Extend min to get (min, MinKey, MinKey, ....)
+ BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false /* upperInclusive */));
+ BSONObj maxKey;
+ if (max.isEmpty()) {
+ // if max not specified, make it (MaxKey, Maxkey, MaxKey...)
+ maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, true /* upperInclusive */));
+ } else {
+ // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive
+ maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, false /* upperInclusive*/));
+ }
+
+ return {minKey, maxKey};
+}
+
+/*
+ * Returns true if the final key in the range is the same as the first key, false otherwise.
+ */
+bool maxKeyEqualToMinKey(OperationContext* opCtx,
+ const CollectionPtr* collection,
+ const IndexDescriptor* shardKeyIdx,
+ const BSONObj& minBound,
+ const BSONObj& maxBound,
+ const BSONObj& minKeyInChunk) {
+ BSONObj maxKeyInChunk;
+ {
+ auto exec = InternalPlanner::indexScan(opCtx,
+ collection,
+ shardKeyIdx,
+ maxBound,
+ minBound,
+ BoundInclusion::kIncludeEndKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::BACKWARD);
+
+ PlanExecutor::ExecState state = exec->getNext(&maxKeyInChunk, nullptr);
+ uassert(ErrorCodes::OperationFailed,
+ "can't open a cursor to find final key in range (desired range is possibly empty)",
+ state == PlanExecutor::ADVANCED);
+ }
+
+ if (minKeyInChunk.woCompare(maxKeyInChunk) == 0) {
+ // Range contains only documents with a single key value. So we cannot possibly find a
+ // split point, and there is no need to scan any further.
+ LOGV2_WARNING(
+ 5865001,
+ "Possible low cardinality key detected in range. Range contains only a single key.",
+ "namespace"_attr = collection->get()->ns(),
+ "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minBound)),
+ "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxBound)),
+ "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKeyInChunk)));
+ return true;
+ }
+
+ return false;
+}
+
} // namespace
std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
@@ -65,13 +130,26 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
const BSONObj& max,
long long maxChunkSizeBytes) {
std::vector<BSONObj> splitKeys;
- std::size_t splitVectorResponseSize = 0;
+
+ int elapsedMillisToFindSplitPoints;
+
+ // Contains each key appearing multiple times and estimated to be able to fill-in a chunk alone
+ auto tooFrequentKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
{
AutoGetCollection collection(opCtx, nss, MODE_IS);
uassert(ErrorCodes::NamespaceNotFound, "ns not found", collection);
+ // Get the size estimate for this namespace
+ const long long totalLocalCollDocuments = collection->numRecords(opCtx);
+ const long long dataSize = collection->dataSize(opCtx);
+
+ // Return empty vector if current estimated data size is less than max chunk size
+ if (dataSize < maxChunkSizeBytes || totalLocalCollDocuments == 0) {
+ return {};
+ }
+
// Allow multiKey based on the invariant that shard keys must be single-valued. Therefore,
// any multi-key index prefixed by shard key cannot be multikey over the shard key fields.
auto catalog = collection->getIndexCatalog();
@@ -82,178 +160,121 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx,
<< keyPattern.clientReadable().toString(),
shardKeyIdx);
- // extend min to get (min, MinKey, MinKey, ....)
- KeyPattern kp(shardKeyIdx->keyPattern());
- BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false));
- BSONObj maxKey;
- if (max.isEmpty()) {
- // if max not specified, make it (MaxKey, Maxkey, MaxKey...)
- maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, true));
- } else {
- // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive
- maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, false));
- }
+ const auto [minKey, maxKey] = getMinMaxExtendedBounds(shardKeyIdx, min, max);
- // Get the size estimate for this namespace
- const long long recCount = collection->numRecords(opCtx);
- const long long dataSize = collection->dataSize(opCtx);
+ // Setup the index scanner that will be used to find the split points
+ auto exec = InternalPlanner::indexScan(opCtx,
+ &(*collection),
+ shardKeyIdx,
+ minKey,
+ maxKey,
+ BoundInclusion::kIncludeStartKeyOnly,
+ PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
+ InternalPlanner::FORWARD);
- // Now that we have the size estimate, go over the remaining parameters and apply any
- // maximum size restrictions specified there.
+ // Get minimum key belonging to the chunk
+ BSONObj minKeyInOriginalChunk;
+ {
+ PlanExecutor::ExecState state = exec->getNext(&minKeyInOriginalChunk, nullptr);
+ uassert(ErrorCodes::OperationFailed,
+ "can't open a cursor to scan the range (desired range is possibly empty)",
+ state == PlanExecutor::ADVANCED);
+ }
- // If there's not enough data for more than one chunk, no point continuing.
- if (dataSize < maxChunkSizeBytes || recCount == 0) {
+ // Return empty vector if chunk's min and max keys are the same.
+ if (maxKeyEqualToMinKey(opCtx,
+ &collection.getCollection(),
+ shardKeyIdx,
+ minKey,
+ maxKey,
+ minKeyInOriginalChunk)) {
return {};
}
LOGV2(5865000,
"Requested split points lookup for chunk",
- "namespace"_attr = nss.toString(),
+ "namespace"_attr = nss,
"minKey"_attr = redact(prettyKey(keyPattern, minKey)),
"maxKey"_attr = redact(prettyKey(keyPattern, maxKey)));
- // We'll use the average object size and number of object to find approximately how many
- // keys each chunk should have. We'll split at half the kMaxObjectPerChunk.
- const long long avgRecSize = dataSize / recCount;
+ // Use the average document size and number of documents to find the approximate number of
+ // keys each chunk should contain
+ const long long avgDocSize = dataSize / totalLocalCollDocuments;
- long long keyCount = maxChunkSizeBytes / (2 * avgRecSize);
+ // Split at half the max chunk size
+ long long maxDocsPerSplittedChunk = maxChunkSizeBytes / (2 * avgDocSize);
- //
- // Traverse the index and add the keyCount-th key to the result vector. If that key
- // appeared in the vector before, we omit it. The invariant here is that all the
- // instances of a given key value live in the same chunk.
- //
+ BSONObj currentKey; // Last key seen during the index scan
+ long long numScannedKeys = 1; // minKeyInOriginalChunk has already been scanned
+ std::size_t resultArraySize = 0; // Approximate size in bytes of the split points array
- Timer timer;
- long long currCount = 0;
- long long numChunks = 0;
+ // Reference to last split point that needs to be checked in order to avoid adding duplicate
+ // split points. Initialized to the min of the first chunk being split.
+ auto lastSplitPoint = dotted_path_support::extractElementsBasedOnTemplate(
+ prettyKey(shardKeyIdx->keyPattern(), minKeyInOriginalChunk.getOwned()), keyPattern);
- auto exec = InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- minKey,
- maxKey,
- BoundInclusion::kIncludeStartKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::FORWARD);
+ Timer timer; // To measure time elapsed while searching split points
- BSONObj currKey;
- PlanExecutor::ExecState state = exec->getNext(&currKey, nullptr);
- uassert(ErrorCodes::OperationFailed,
- "can't open a cursor to scan the range (desired range is possibly empty)",
- state == PlanExecutor::ADVANCED);
+ // Traverse the index and add the maxDocsPerSplittedChunk-th key to the result vector
+ while (exec->getNext(&currentKey, nullptr) == PlanExecutor::ADVANCED) {
+ numScannedKeys++;
- // Get the final key in the range, and see if it's the same as the first key.
- BSONObj maxKeyInChunk;
- {
- auto exec = InternalPlanner::indexScan(opCtx,
- &collection.getCollection(),
- shardKeyIdx,
- maxKey,
- minKey,
- BoundInclusion::kIncludeEndKeyOnly,
- PlanYieldPolicy::YieldPolicy::YIELD_AUTO,
- InternalPlanner::BACKWARD);
-
- PlanExecutor::ExecState state = exec->getNext(&maxKeyInChunk, nullptr);
- uassert(
- ErrorCodes::OperationFailed,
- "can't open a cursor to find final key in range (desired range is possibly empty)",
- state == PlanExecutor::ADVANCED);
- }
+ if (numScannedKeys > maxDocsPerSplittedChunk) {
+ currentKey = dotted_path_support::extractElementsBasedOnTemplate(
+ prettyKey(shardKeyIdx->keyPattern(), currentKey.getOwned()), keyPattern);
- if (currKey.woCompare(maxKeyInChunk) == 0) {
- // Range contains only documents with a single key value. So we cannot possibly find a
- // split point, and there is no need to scan any further.
- LOGV2_WARNING(
- 5865001,
- "Possible low cardinality key detected in range. Range contains only a single key.",
- "namespace"_attr = nss.toString(),
- "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)),
- "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey)),
- "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), currKey)));
- return {};
- }
+ if (currentKey.woCompare(lastSplitPoint) == 0) {
+ // Do not add again the same split point in case of frequent shard key.
+ tooFrequentKeys.insert(currentKey.getOwned());
+ continue;
+ }
- // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel,
- // to be removed at the end. If a key appears more times than entries allowed on a
- // chunk, we issue a warning and split on the following key.
- auto tooFrequentKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
- splitKeys.push_back(dotted_path_support::extractElementsBasedOnTemplate(
- prettyKey(shardKeyIdx->keyPattern(), currKey.getOwned()), keyPattern));
-
- while (PlanExecutor::ADVANCED == state) {
- currCount++;
-
- if (currCount > keyCount) {
- currKey = dotted_path_support::extractElementsBasedOnTemplate(
- prettyKey(shardKeyIdx->keyPattern(), currKey.getOwned()), keyPattern);
-
- // Do not use this split key if it is the same used in the previous split
- // point.
- if (currKey.woCompare(splitKeys.back()) == 0) {
- tooFrequentKeys.insert(currKey.getOwned());
- } else {
- auto additionalKeySize =
- currKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray;
- if (splitVectorResponseSize + additionalKeySize > BSONObjMaxUserSize) {
- if (splitKeys.empty()) {
- // Keep trying until we get at least one split point that isn't
- // above the max object user size.
- state = exec->getNext(&currKey, nullptr);
- continue;
- }
-
- LOGV2(5865002,
- "Max BSON response size reached for split vector before the end "
- "of chunk",
- "namespace"_attr = nss.toString(),
- "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)),
- "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey)));
- break;
+ const auto additionalKeySize =
+ currentKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray;
+ if (resultArraySize + additionalKeySize > BSONObjMaxUserSize) {
+ if (splitKeys.empty()) {
+ // Keep trying until finding at least one split point that isn't above
+ // the max object user size. Very improbable corner case: the shard key
+ // size for the chosen split point is exactly 16MB.
+ continue;
}
- splitVectorResponseSize += additionalKeySize;
- splitKeys.push_back(currKey.getOwned());
- currCount = 0;
- numChunks++;
- LOGV2_DEBUG(5865003,
- 4,
- "Picked a split key: {key}",
- "Picked a split key",
- "key"_attr = redact(currKey));
+ LOGV2(5865002,
+ "Max BSON response size reached for split vector before the end "
+ "of chunk",
+ "namespace"_attr = nss,
+ "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)),
+ "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey)));
+ break;
}
- }
- state = exec->getNext(&currKey, nullptr);
- }
+ resultArraySize += additionalKeySize;
+ splitKeys.push_back(currentKey.getOwned());
+ lastSplitPoint = splitKeys.back();
+ numScannedKeys = 0;
- //
- // Format the result and issue any warnings about the data we gathered while traversing the
- // index
- //
-
- // Warn for keys that are more numerous than maxChunkSizeBytes allows.
- for (auto it = tooFrequentKeys.cbegin(); it != tooFrequentKeys.cend(); ++it) {
- LOGV2_WARNING(5865004,
- "Possible low cardinality key detected",
- "namespace"_attr = nss.toString(),
- "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), *it)));
+ LOGV2_DEBUG(5865003, 4, "Picked a split key", "key"_attr = redact(currentKey));
+ }
}
- // Remove the sentinel at the beginning before returning
- splitKeys.erase(splitKeys.begin());
-
- if (timer.millis() > serverGlobalParams.slowMS) {
- LOGV2_WARNING(5865005,
- "Finding the split vector completed",
- "namespace"_attr = nss.toString(),
- "keyPattern"_attr = redact(keyPattern),
- "keyCount"_attr = keyCount,
- "numSplits"_attr = splitKeys.size(),
- "currCount"_attr = currCount,
- "duration"_attr = Milliseconds(timer.millis()));
- }
+ elapsedMillisToFindSplitPoints = timer.millis();
+ }
+
+ // Emit a warning for each frequent key
+ for (const auto& frequentKey : tooFrequentKeys) {
+ LOGV2_WARNING(5865004,
+ "Possible low cardinality key detected",
+ "namespace"_attr = nss,
+ "key"_attr = redact(prettyKey(keyPattern, frequentKey)));
+ }
+
+ if (elapsedMillisToFindSplitPoints > serverGlobalParams.slowMS) {
+ LOGV2_WARNING(5865005,
+ "Finding the auto split vector completed",
+ "namespace"_attr = nss,
+ "keyPattern"_attr = redact(keyPattern),
+ "numSplits"_attr = splitKeys.size(),
+ "duration"_attr = Milliseconds(elapsedMillisToFindSplitPoints));
}
// Make sure splitKeys is in ascending order