/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding #include "mongo/platform/basic.h" #include "mongo/db/s/split_vector.h" #include "mongo/base/status_with.h" #include "mongo/db/bson/dotted_path_support.h" #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog_raii.h" #include "mongo/db/dbhelpers.h" #include "mongo/db/exec/working_set_common.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/keypattern.h" #include "mongo/db/namespace_string.h" #include "mongo/db/query/internal_plans.h" #include "mongo/db/query/plan_executor.h" #include "mongo/util/log.h" namespace mongo { namespace { const int kMaxObjectPerChunk{250000}; const int estimatedAdditionalBytesPerItemInBSONArray{2}; BSONObj prettyKey(const BSONObj& keyPattern, const BSONObj& key) { return key.replaceFieldNames(keyPattern).clientReadable(); } } // namespace StatusWith> splitVector(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& keyPattern, const BSONObj& min, const BSONObj& max, bool force, boost::optional maxSplitPoints, boost::optional maxChunkObjects, boost::optional maxChunkSize, boost::optional maxChunkSizeBytes) { std::vector splitKeys; std::size_t splitVectorResponseSize = 0; // Always have a default value for maxChunkObjects if (!maxChunkObjects) { maxChunkObjects = kMaxObjectPerChunk; } { AutoGetCollection autoColl(opCtx, nss, MODE_IS); Collection* const collection = autoColl.getCollection(); if (!collection) { return {ErrorCodes::NamespaceNotFound, "ns not found"}; } // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, // any multi-key index prefixed by shard key cannot be multikey over the shard key fields. const IndexDescriptor* idx = collection->getIndexCatalog()->findShardKeyPrefixedIndex(opCtx, keyPattern, false); if (idx == nullptr) { return {ErrorCodes::IndexNotFound, "couldn't find index over splitting key " + keyPattern.clientReadable().toString()}; } // extend min to get (min, MinKey, MinKey, ....) KeyPattern kp(idx->keyPattern()); BSONObj minKey = Helpers::toKeyFormat(kp.extendRangeBound(min, false)); BSONObj maxKey; if (max.isEmpty()) { // if max not specified, make it (MaxKey, Maxkey, MaxKey...) maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, true)); } else { // otherwise make it (max,MinKey,MinKey...) so that bound is non-inclusive maxKey = Helpers::toKeyFormat(kp.extendRangeBound(max, false)); } // Get the size estimate for this namespace const long long recCount = collection->numRecords(opCtx); const long long dataSize = collection->dataSize(opCtx); // Now that we have the size estimate, go over the remaining parameters and apply any // maximum size restrictions specified there. // Forcing a split is equivalent to having maxChunkSize be the size of the current // chunk, i.e., the logic below will split that chunk in half if (force) { maxChunkSize = dataSize; } else if (!maxChunkSize) { if (maxChunkSizeBytes) { maxChunkSize = maxChunkSizeBytes.get(); } } else { maxChunkSize = maxChunkSize.get() * 1 << 20; } // We need a maximum size for the chunk. if (!maxChunkSize || maxChunkSize.get() <= 0) { return {ErrorCodes::InvalidOptions, "need to specify the desired max chunk size"}; } // If there's not enough data for more than one chunk, no point continuing. if (dataSize < maxChunkSize.get() || recCount == 0) { std::vector emptyVector; return emptyVector; } log() << "request split points lookup for chunk " << nss.toString() << " " << redact(minKey) << " -->> " << redact(maxKey); // We'll use the average object size and number of object to find approximately how many // keys each chunk should have. We'll split at half the maxChunkSize or maxChunkObjects, // if provided. const long long avgRecSize = dataSize / recCount; long long keyCount = maxChunkSize.get() / (2 * avgRecSize); if (maxChunkObjects.get() && (maxChunkObjects.get() < keyCount)) { log() << "limiting split vector to " << maxChunkObjects.get() << " (from " << keyCount << ") objects "; keyCount = maxChunkObjects.get(); } // // Traverse the index and add the keyCount-th key to the result vector. If that key // appeared in the vector before, we omit it. The invariant here is that all the // instances of a given key value live in the same chunk. // Timer timer; long long currCount = 0; long long numChunks = 0; auto exec = InternalPlanner::indexScan(opCtx, collection, idx, minKey, maxKey, BoundInclusion::kIncludeStartKeyOnly, PlanExecutor::YIELD_AUTO, InternalPlanner::FORWARD); BSONObj currKey; PlanExecutor::ExecState state = exec->getNext(&currKey, nullptr); if (PlanExecutor::ADVANCED != state) { return {ErrorCodes::OperationFailed, "can't open a cursor to scan the range (desired range is possibly empty)"}; } // Get the final key in the range, and see if it's the same as the first key. BSONObj maxKeyInChunk; { auto exec = InternalPlanner::indexScan(opCtx, collection, idx, maxKey, minKey, BoundInclusion::kIncludeEndKeyOnly, PlanExecutor::YIELD_AUTO, InternalPlanner::BACKWARD); PlanExecutor::ExecState state = exec->getNext(&maxKeyInChunk, nullptr); if (PlanExecutor::ADVANCED != state) { return {ErrorCodes::OperationFailed, "can't open a cursor to find final key in range (desired range is possibly " "empty)"}; } } if (currKey.woCompare(maxKeyInChunk) == 0) { // Range contains only documents with a single key value. So we cannot possibly find a // split point, and there is no need to scan any further. warning() << "possible low cardinality key detected in " << nss.toString() << " - range " << redact(minKey) << " -->> " << redact(maxKey) << " contains only the key " << redact(prettyKey(idx->keyPattern(), currKey)); std::vector emptyVector; return emptyVector; } // Use every 'keyCount'-th key as a split point. We add the initial key as a sentinel, // to be removed at the end. If a key appears more times than entries allowed on a // chunk, we issue a warning and split on the following key. auto tooFrequentKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); splitKeys.push_back(dotted_path_support::extractElementsBasedOnTemplate( prettyKey(idx->keyPattern(), currKey.getOwned()), keyPattern)); while (1) { while (PlanExecutor::ADVANCED == state) { currCount++; if (currCount > keyCount && !force) { currKey = dotted_path_support::extractElementsBasedOnTemplate( prettyKey(idx->keyPattern(), currKey.getOwned()), keyPattern); // Do not use this split key if it is the same used in the previous split // point. if (currKey.woCompare(splitKeys.back()) == 0) { tooFrequentKeys.insert(currKey.getOwned()); } else { auto additionalKeySize = currKey.objsize() + estimatedAdditionalBytesPerItemInBSONArray; if (splitVectorResponseSize + additionalKeySize > BSONObjMaxUserSize) { if (splitKeys.empty()) { // Keep trying until we get at least one split point that isn't // above the max object user size. state = exec->getNext(&currKey, nullptr); continue; } log() << "Max BSON response size reached for split vector before the" << " end of chunk " << nss.toString() << " " << redact(minKey) << " -->> " << redact(maxKey); break; } splitVectorResponseSize += additionalKeySize; splitKeys.push_back(currKey.getOwned()); currCount = 0; numChunks++; LOG(4) << "picked a split key: " << redact(currKey); } } // Stop if we have enough split points. if (maxSplitPoints && maxSplitPoints.get() && (numChunks >= maxSplitPoints.get())) { log() << "max number of requested split points reached (" << numChunks << ") before the end of chunk " << nss.toString() << " " << redact(minKey) << " -->> " << redact(maxKey); break; } state = exec->getNext(&currKey, nullptr); } if (PlanExecutor::FAILURE == state) { return WorkingSetCommon::getMemberObjectStatus(currKey).withContext( "Executor error during splitVector command"); } if (!force) break; // // If we're forcing a split at the halfway point, then the first pass was just // to count the keys, and we still need a second pass. // force = false; keyCount = currCount / 2; currCount = 0; log() << "splitVector doing another cycle because of force, keyCount now: " << keyCount; exec = InternalPlanner::indexScan(opCtx, collection, idx, minKey, maxKey, BoundInclusion::kIncludeStartKeyOnly, PlanExecutor::YIELD_AUTO, InternalPlanner::FORWARD); state = exec->getNext(&currKey, nullptr); } // // Format the result and issue any warnings about the data we gathered while traversing the // index // // Warn for keys that are more numerous than maxChunkSize allows. for (auto it = tooFrequentKeys.cbegin(); it != tooFrequentKeys.cend(); ++it) { warning() << "possible low cardinality key detected in " << nss.toString() << " - key is " << redact(prettyKey(idx->keyPattern(), *it)); } // Remove the sentinel at the beginning before returning splitKeys.erase(splitKeys.begin()); if (timer.millis() > serverGlobalParams.slowMS) { warning() << "Finding the split vector for " << nss.toString() << " over " << redact(keyPattern) << " keyCount: " << keyCount << " numSplits: " << splitKeys.size() << " lookedAt: " << currCount << " took " << timer.millis() << "ms"; } } // Make sure splitKeys is in ascending order std::sort( splitKeys.begin(), splitKeys.end(), SimpleBSONObjComparator::kInstance.makeLessThan()); return splitKeys; } } // namespace mongo