diff options
author | Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com> | 2021-07-21 13:45:20 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-22 09:07:48 +0000 |
commit | 1f4dd25739c182d7528847dd3303598a751d1390 (patch) | |
tree | 9dbeb280c8bbbcb4c8af1cd6e30c9d688c8b3061 | |
parent | 57c5085d7524540fed7aa83d5e3f149d27c355fd (diff) | |
download | mongo-1f4dd25739c182d7528847dd3303598a751d1390.tar.gz |
SERVER-58664 Simplify and comment autoSplitVector
-rw-r--r-- | src/mongo/db/s/auto_split_vector.cpp | 319 |
1 files changed, 170 insertions, 149 deletions
diff --git a/src/mongo/db/s/auto_split_vector.cpp b/src/mongo/db/s/auto_split_vector.cpp index 4fad7f80a84..81933ad8ac7 100644 --- a/src/mongo/db/s/auto_split_vector.cpp +++ b/src/mongo/db/s/auto_split_vector.cpp @@ -49,13 +49,78 @@ namespace mongo { namespace { -const int kMaxObjectPerChunk{250000}; -const int estimatedAdditionalBytesPerItemInBSONArray{2}; +constexpr int estimatedAdditionalBytesPerItemInBSONArray{2}; BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) { return key.replaceFieldNames(keyPattern).clientReadable(); } +/* + * Takes the given min/max BSON objects that are a prefix of the shardKey and return two new BSON + * object extended to cover the entire shardKey. See KeyPattern::extendRangeBound documentation for + * some examples. + */ +const std::tuple<BSONObj, BSONObj> getMinMaxExtendedBounds(const IndexDescriptor* shardKeyIdx, + const BSONObj& min, + const BSONObj& max) { + KeyPattern kp(shardKeyIdx->keyPattern()); + + // Extend min to get (min, MinKey, MinKey, ....) + BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false /* upperInclusive */)); + BSONObj maxKey; + if (max.isEmpty()) { + // if max not specified, make it (MaxKey, Maxkey, MaxKey...) + maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, true /* upperInclusive */)); + } else { + // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive + maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, false /* upperInclusive*/)); + } + + return {minKey, maxKey}; +} + +/* + * Returns true if the final key in the range is the same as the first key, false otherwise. + */ +bool maxKeyEqualToMinKey(OperationContext* opCtx, + const CollectionPtr* collection, + const IndexDescriptor* shardKeyIdx, + const BSONObj& minBound, + const BSONObj& maxBound, + const BSONObj& minKeyInChunk) { + BSONObj maxKeyInChunk; + { + auto exec = InternalPlanner::indexScan(opCtx, + collection, + shardKeyIdx, + maxBound, + minBound, + BoundInclusion::kIncludeEndKeyOnly, + PlanYieldPolicy::YieldPolicy::YIELD_AUTO, + InternalPlanner::BACKWARD); + + PlanExecutor::ExecState state = exec->getNext(&maxKeyInChunk, nullptr); + uassert(ErrorCodes::OperationFailed, + "can't open a cursor to find final key in range (desired range is possibly empty)", + state == PlanExecutor::ADVANCED); + } + + if (minKeyInChunk.woCompare(maxKeyInChunk) == 0) { + // Range contains only documents with a single key value. So we cannot possibly find a + // split point, and there is no need to scan any further. + LOGV2_WARNING( + 5865001, + "Possible low cardinality key detected in range. Range contains only a single key.", + "namespace"_attr = collection->get()->ns(), + "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minBound)), + "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxBound)), + "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKeyInChunk))); + return true; + } + + return false; +} + } // namespace std::vector<BSONObj> autoSplitVector(OperationContext* opCtx, @@ -65,13 +130,26 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx, const BSONObj& max, long long maxChunkSizeBytes) { std::vector<BSONObj> splitKeys; - std::size_t splitVectorResponseSize = 0; + + int elapsedMillisToFindSplitPoints; + + // Contains each key appearing multiple times and estimated to be able to fill-in a chunk alone + auto tooFrequentKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); { AutoGetCollection collection(opCtx, nss, MODE_IS); uassert(ErrorCodes::NamespaceNotFound, "ns not found", collection); + // Get the size estimate for this namespace + const long long totalLocalCollDocuments = collection->numRecords(opCtx); + const long long dataSize = collection->dataSize(opCtx); + + // Return empty vector if current estimated data size is less than max chunk size + if (dataSize < maxChunkSizeBytes || totalLocalCollDocuments == 0) { + return {}; + } + // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, // any multi-key index prefixed by shard key cannot be multikey over the shard key fields. auto catalog = collection->getIndexCatalog(); @@ -82,178 +160,121 @@ std::vector<BSONObj> autoSplitVector(OperationContext* opCtx, << keyPattern.clientReadable().toString(), shardKeyIdx); - // extend min to get (min, MinKey, MinKey, ....) - KeyPattern kp(shardKeyIdx->keyPattern()); - BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false)); - BSONObj maxKey; - if (max.isEmpty()) { - // if max not specified, make it (MaxKey, Maxkey, MaxKey...) - maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, true)); - } else { - // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive - maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, false)); - } + const auto [minKey, maxKey] = getMinMaxExtendedBounds(shardKeyIdx, min, max); - // Get the size estimate for this namespace - const long long recCount = collection->numRecords(opCtx); - const long long dataSize = collection->dataSize(opCtx); + // Setup the index scanner that will be used to find the split points + auto exec = InternalPlanner::indexScan(opCtx, + &(*collection), + shardKeyIdx, + minKey, + maxKey, + BoundInclusion::kIncludeStartKeyOnly, + PlanYieldPolicy::YieldPolicy::YIELD_AUTO, + InternalPlanner::FORWARD); - // Now that we have the size estimate, go over the remaining parameters and apply any - // maximum size restrictions specified there. + // Get minimum key belonging to the chunk + BSONObj minKeyInOriginalChunk; + { + PlanExecutor::ExecState state = exec->getNext(&minKeyInOriginalChunk, nullptr); + uassert(ErrorCodes::OperationFailed, + "can't open a cursor to scan the range (desired range is possibly empty)", + state == PlanExecutor::ADVANCED); + } - // If there's not enough data for more than one chunk, no point continuing. - if (dataSize < maxChunkSizeBytes || recCount == 0) { + // Return empty vector if chunk's min and max keys are the same. + if (maxKeyEqualToMinKey(opCtx, + &collection.getCollection(), + shardKeyIdx, + minKey, + maxKey, + minKeyInOriginalChunk)) { return {}; } LOGV2(5865000, "Requested split points lookup for chunk", - "namespace"_attr = nss.toString(), + "namespace"_attr = nss, "minKey"_attr = redact(prettyKey(keyPattern, minKey)), "maxKey"_attr = redact(prettyKey(keyPattern, maxKey))); - // We'll use the average object size and number of object to find approximately how many - // keys each chunk should have. We'll split at half the kMaxObjectPerChunk. - const long long avgRecSize = dataSize / recCount; + // Use the average document size and number of documents to find the approximate number of + // keys each chunk should contain + const long long avgDocSize = dataSize / totalLocalCollDocuments; - long long keyCount = maxChunkSizeBytes / (2 * avgRecSize); + // Split at half the max chunk size + long long maxDocsPerSplittedChunk = maxChunkSizeBytes / (2 * avgDocSize); - // - // Traverse the index and add the keyCount-th key to the result vector. If that key - // appeared in the vector before, we omit it. The invariant here is that all the - // instances of a given key value live in the same chunk. - // + BSONObj currentKey; // Last key seen during the index scan + long long numScannedKeys = 1; // minKeyInOriginalChunk has already been scanned + std::size_t resultArraySize = 0; // Approximate size in bytes of the split points array - Timer timer; - long long currCount = 0; - long long numChunks = 0; + // Reference to last split point that needs to be checked in order to avoid adding duplicate + // split points. Initialized to the min of the first chunk being split. + auto lastSplitPoint = dotted_path_support::extractElementsBasedOnTemplate( + prettyKey(shardKeyIdx->keyPattern(), minKeyInOriginalChunk.getOwned()), keyPattern); - auto exec = InternalPlanner::indexScan(opCtx, - &collection.getCollection(), - shardKeyIdx, - minKey, - maxKey, - BoundInclusion::kIncludeStartKeyOnly, - PlanYieldPolicy::YieldPolicy::YIELD_AUTO, - InternalPlanner::FORWARD); + Timer timer; // To measure time elapsed while searching split points - BSONObj currKey; - PlanExecutor::ExecState state = exec->getNext(&currKey, nullptr); - uassert(ErrorCodes::OperationFailed, - "can't open a cursor to scan the range (desired range is possibly empty)", - state == PlanExecutor::ADVANCED); + // Traverse the index and add the maxDocsPerSplittedChunk-th key to the result vector + while (exec->getNext(¤tKey, nullptr) == PlanExecutor::ADVANCED) { + numScannedKeys++; - // Get the final key in the range, and see if it's the same as the first key. - BSONObj maxKeyInChunk; - { - auto exec = InternalPlanner::indexScan(opCtx, - &collection.getCollection(), - shardKeyIdx, - maxKey, - minKey, - BoundInclusion::kIncludeEndKeyOnly, - PlanYieldPolicy::YieldPolicy::YIELD_AUTO, - InternalPlanner::BACKWARD); - - PlanExecutor::ExecState state = exec->getNext(&maxKeyInChunk, nullptr); - uassert( - ErrorCodes::OperationFailed, - "can't open a cursor to find final key in range (desired range is possibly empty)", - state == PlanExecutor::ADVANCED); - } + if (numScannedKeys > maxDocsPerSplittedChunk) { + currentKey = dotted_path_support::extractElementsBasedOnTemplate( + prettyKey(shardKeyIdx->keyPattern(), currentKey.getOwned()), keyPattern); - if (currKey.woCompare(maxKeyInChunk) == 0) { - // Range contains only documents with a single key value. So we cannot possibly find a - // split point, and there is no need to scan any further. - LOGV2_WARNING( - 5865001, - "Possible low cardinality key detected in range. Range contains only a single key.", - "namespace"_attr = nss.toString(), - "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)), - "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey)), - "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), currKey))); - return {}; - } + if (currentKey.woCompare(lastSplitPoint) == 0) { + // Do not add again the same split point in case of frequent shard key. + tooFrequentKeys.insert(currentKey.getOwned()); + continue; + } - // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, - // to be removed at the end. If a key appears more times than entries allowed on a - // chunk, we issue a warning and split on the following key. - auto tooFrequentKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); - splitKeys.push_back(dotted_path_support::extractElementsBasedOnTemplate( - prettyKey(shardKeyIdx->keyPattern(), currKey.getOwned()), keyPattern)); - - while (PlanExecutor::ADVANCED == state) { - currCount++; - - if (currCount > keyCount) { - currKey = dotted_path_support::extractElementsBasedOnTemplate( - prettyKey(shardKeyIdx->keyPattern(), currKey.getOwned()), keyPattern); - - // Do not use this split key if it is the same used in the previous split - // point. - if (currKey.woCompare(splitKeys.back()) == 0) { - tooFrequentKeys.insert(currKey.getOwned()); - } else { - auto additionalKeySize = - currKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray; - if (splitVectorResponseSize + additionalKeySize > BSONObjMaxUserSize) { - if (splitKeys.empty()) { - // Keep trying until we get at least one split point that isn't - // above the max object user size. - state = exec->getNext(&currKey, nullptr); - continue; - } - - LOGV2(5865002, - "Max BSON response size reached for split vector before the end " - "of chunk", - "namespace"_attr = nss.toString(), - "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)), - "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey))); - break; + const auto additionalKeySize = + currentKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray; + if (resultArraySize + additionalKeySize > BSONObjMaxUserSize) { + if (splitKeys.empty()) { + // Keep trying until finding at least one split point that isn't above + // the max object user size. Very improbable corner case: the shard key + // size for the chosen split point is exactly 16MB. + continue; } - splitVectorResponseSize += additionalKeySize; - splitKeys.push_back(currKey.getOwned()); - currCount = 0; - numChunks++; - LOGV2_DEBUG(5865003, - 4, - "Picked a split key: {key}", - "Picked a split key", - "key"_attr = redact(currKey)); + LOGV2(5865002, + "Max BSON response size reached for split vector before the end " + "of chunk", + "namespace"_attr = nss, + "minKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), minKey)), + "maxKey"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), maxKey))); + break; } - } - state = exec->getNext(&currKey, nullptr); - } + resultArraySize += additionalKeySize; + splitKeys.push_back(currentKey.getOwned()); + lastSplitPoint = splitKeys.back(); + numScannedKeys = 0; - // - // Format the result and issue any warnings about the data we gathered while traversing the - // index - // - - // Warn for keys that are more numerous than maxChunkSizeBytes allows. - for (auto it = tooFrequentKeys.cbegin(); it != tooFrequentKeys.cend(); ++it) { - LOGV2_WARNING(5865004, - "Possible low cardinality key detected", - "namespace"_attr = nss.toString(), - "key"_attr = redact(prettyKey(shardKeyIdx->keyPattern(), *it))); + LOGV2_DEBUG(5865003, 4, "Picked a split key", "key"_attr = redact(currentKey)); + } } - // Remove the sentinel at the beginning before returning - splitKeys.erase(splitKeys.begin()); - - if (timer.millis() > serverGlobalParams.slowMS) { - LOGV2_WARNING(5865005, - "Finding the split vector completed", - "namespace"_attr = nss.toString(), - "keyPattern"_attr = redact(keyPattern), - "keyCount"_attr = keyCount, - "numSplits"_attr = splitKeys.size(), - "currCount"_attr = currCount, - "duration"_attr = Milliseconds(timer.millis())); - } + elapsedMillisToFindSplitPoints = timer.millis(); + } + + // Emit a warning for each frequent key + for (const auto& frequentKey : tooFrequentKeys) { + LOGV2_WARNING(5865004, + "Possible low cardinality key detected", + "namespace"_attr = nss, + "key"_attr = redact(prettyKey(keyPattern, frequentKey))); + } + + if (elapsedMillisToFindSplitPoints > serverGlobalParams.slowMS) { + LOGV2_WARNING(5865005, + "Finding the auto split vector completed", + "namespace"_attr = nss, + "keyPattern"_attr = redact(keyPattern), + "numSplits"_attr = splitKeys.size(), + "duration"_attr = Milliseconds(elapsedMillisToFindSplitPoints)); } // Make sure splitKeys is in ascending order |