/**
* Copyright (C) 2013 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects
* for all of the code used other than as permitted herein. If you modify
* file(s) with this exception, you may extend this exception to your
* version of the file(s), but you are not obligated to do so. If you do not
* wish to do so, delete this exception statement from your version. If you
* delete this exception statement from all source files in the program,
* then also delete it in the license file.
*/
#include "mongo/platform/basic.h"
#include "mongo/s/shard_key_pattern.h"
#include
#include "mongo/db/field_ref.h"
#include "mongo/db/field_ref_set.h"
#include "mongo/db/hasher.h"
#include "mongo/db/index_names.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/update/path_support.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/transitional_tools_do_not_use/vector_spooling.h"
namespace mongo {
using std::shared_ptr;
using std::string;
using std::unique_ptr;
using std::vector;
using pathsupport::EqualityMatches;
const int ShardKeyPattern::kMaxShardKeySizeBytes = 512;
const unsigned int ShardKeyPattern::kMaxFlattenedInCombinations = 4000000;
namespace {
bool isHashedPatternEl(const BSONElement& el) {
return el.type() == String && el.String() == IndexNames::HASHED;
}
/**
* Currently the allowable shard keys are either:
* i) a hashed single field, e.g. { a : "hashed" }, or
* ii) a compound list of ascending, potentially-nested field paths, e.g. { a : 1 , b.c : 1 }
*/
std::vector> parseShardKeyPattern(const BSONObj& keyPattern) {
std::vector> parsedPaths;
for (const auto& patternEl : keyPattern) {
auto newFieldRef(stdx::make_unique(patternEl.fieldNameStringData()));
// Empty path
if (newFieldRef->numParts() == 0)
return {};
// Extra "." in path?
if (newFieldRef->dottedField() != patternEl.fieldNameStringData())
return {};
// Empty parts of the path, ".."?
for (size_t i = 0; i < newFieldRef->numParts(); ++i) {
if (newFieldRef->getPart(i).empty())
return {};
}
// Numeric and ascending (1.0), or "hashed" and single field
if (!patternEl.isNumber()) {
if (keyPattern.nFields() != 1 || !isHashedPatternEl(patternEl))
return {};
} else if (patternEl.numberInt() != 1) {
return {};
}
parsedPaths.emplace_back(std::move(newFieldRef));
}
return parsedPaths;
}
bool isShardKeyElement(const BSONElement& element, bool allowRegex) {
if (element.eoo() || element.type() == Array)
return false;
// TODO: Disallow regex all the time
if (!allowRegex && element.type() == RegEx)
return false;
if (element.type() == Object && !element.embeddedObject().storageValidEmbedded().isOK())
return false;
return true;
}
} // namespace
Status ShardKeyPattern::checkShardKeySize(const BSONObj& shardKey) {
if (shardKey.objsize() <= kMaxShardKeySizeBytes)
return Status::OK();
return {ErrorCodes::ShardKeyTooBig,
str::stream() << "shard keys must be less than " << kMaxShardKeySizeBytes
<< " bytes, but key "
<< shardKey
<< " is "
<< shardKey.objsize()
<< " bytes"};
}
ShardKeyPattern::ShardKeyPattern(const BSONObj& keyPattern)
: _keyPatternPaths(parseShardKeyPattern(keyPattern)),
_keyPattern(_keyPatternPaths.empty() ? BSONObj() : keyPattern),
_hasId(keyPattern.hasField("_id"_sd)) {}
ShardKeyPattern::ShardKeyPattern(const KeyPattern& keyPattern)
: ShardKeyPattern(keyPattern.toBSON()) {}
bool ShardKeyPattern::isValid() const {
return !_keyPattern.toBSON().isEmpty();
}
bool ShardKeyPattern::isHashedPattern() const {
return isHashedPatternEl(_keyPattern.toBSON().firstElement());
}
const KeyPattern& ShardKeyPattern::getKeyPattern() const {
return _keyPattern;
}
const std::vector>& ShardKeyPattern::getKeyPatternFields() const {
return _keyPatternPaths;
}
const BSONObj& ShardKeyPattern::toBSON() const {
return _keyPattern.toBSON();
}
string ShardKeyPattern::toString() const {
return toBSON().toString();
}
bool ShardKeyPattern::isShardKey(const BSONObj& shardKey) const {
// Shard keys are always of the form: { 'nested.path' : value, 'nested.path2' : value }
if (!isValid())
return false;
const auto& keyPatternBSON = _keyPattern.toBSON();
for (const auto& patternEl : keyPatternBSON) {
BSONElement keyEl = shardKey[patternEl.fieldNameStringData()];
if (!isShardKeyElement(keyEl, true))
return false;
}
return shardKey.nFields() == keyPatternBSON.nFields();
}
BSONObj ShardKeyPattern::normalizeShardKey(const BSONObj& shardKey) const {
// Shard keys are always of the form: { 'nested.path' : value, 'nested.path2' : value }
// and in the same order as the key pattern
if (!isValid())
return BSONObj();
// We want to return an empty key if users pass us something that's not a shard key
if (shardKey.nFields() > _keyPattern.toBSON().nFields())
return BSONObj();
BSONObjBuilder keyBuilder;
BSONObjIterator patternIt(_keyPattern.toBSON());
while (patternIt.more()) {
BSONElement patternEl = patternIt.next();
BSONElement keyEl = shardKey[patternEl.fieldNameStringData()];
if (!isShardKeyElement(keyEl, true))
return BSONObj();
keyBuilder.appendAs(keyEl, patternEl.fieldName());
}
dassert(isShardKey(keyBuilder.asTempObj()));
return keyBuilder.obj();
}
static BSONElement extractKeyElementFromMatchable(const MatchableDocument& matchable,
StringData pathStr) {
ElementPath path;
path.init(pathStr);
path.setTraverseNonleafArrays(false);
path.setTraverseLeafArray(false);
MatchableDocument::IteratorHolder matchIt(&matchable, &path);
if (!matchIt->more())
return BSONElement();
BSONElement matchEl = matchIt->next().element();
// We shouldn't have more than one element - we don't expand arrays
dassert(!matchIt->more());
return matchEl;
}
BSONObj ShardKeyPattern::extractShardKeyFromMatchable(const MatchableDocument& matchable) const {
if (!isValid())
return BSONObj();
BSONObjBuilder keyBuilder;
BSONObjIterator patternIt(_keyPattern.toBSON());
while (patternIt.more()) {
BSONElement patternEl = patternIt.next();
BSONElement matchEl =
extractKeyElementFromMatchable(matchable, patternEl.fieldNameStringData());
if (!isShardKeyElement(matchEl, true))
return BSONObj();
if (isHashedPatternEl(patternEl)) {
keyBuilder.append(
patternEl.fieldName(),
BSONElementHasher::hash64(matchEl, BSONElementHasher::DEFAULT_HASH_SEED));
} else {
// NOTE: The matched element may *not* have the same field name as the path -
// index keys don't contain field names, for example
keyBuilder.appendAs(matchEl, patternEl.fieldName());
}
}
dassert(isShardKey(keyBuilder.asTempObj()));
return keyBuilder.obj();
}
BSONObj ShardKeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const {
BSONMatchableDocument matchable(doc);
return extractShardKeyFromMatchable(matchable);
}
static BSONElement findEqualityElement(const EqualityMatches& equalities, const FieldRef& path) {
int parentPathPart;
const BSONElement parentEl =
pathsupport::findParentEqualityElement(equalities, path, &parentPathPart);
if (parentPathPart == static_cast(path.numParts()))
return parentEl;
if (parentEl.type() != Object)
return BSONElement();
StringData suffixStr = path.dottedSubstring(parentPathPart, path.numParts());
BSONMatchableDocument matchable(parentEl.Obj());
return extractKeyElementFromMatchable(matchable, suffixStr);
}
StatusWith ShardKeyPattern::extractShardKeyFromQuery(OperationContext* opCtx,
const BSONObj& basicQuery) const {
if (!isValid())
return StatusWith(BSONObj());
auto qr = stdx::make_unique(NamespaceString(""));
qr->setFilter(basicQuery);
const boost::intrusive_ptr expCtx;
auto statusWithCQ =
CanonicalQuery::canonicalize(opCtx,
std::move(qr),
expCtx,
ExtensionsCallbackNoop(),
MatchExpressionParser::kAllowAllSpecialFeatures);
if (!statusWithCQ.isOK()) {
return StatusWith(statusWithCQ.getStatus());
}
unique_ptr query = std::move(statusWithCQ.getValue());
return extractShardKeyFromQuery(*query);
}
BSONObj ShardKeyPattern::extractShardKeyFromQuery(const CanonicalQuery& query) const {
if (!isValid())
return BSONObj();
// Extract equalities from query.
EqualityMatches equalities;
// TODO: Build the path set initially?
FieldRefSet keyPatternPathSet(transitional_tools_do_not_use::unspool_vector(_keyPatternPaths));
// We only care about extracting the full key pattern paths - if they don't exist (or are
// conflicting), we don't contain the shard key.
Status eqStatus =
pathsupport::extractFullEqualityMatches(*query.root(), keyPatternPathSet, &equalities);
// NOTE: Failure to extract equality matches just means we return no shard key - it's not
// an error we propagate
if (!eqStatus.isOK())
return BSONObj();
// Extract key from equalities
// NOTE: The method below is equivalent to constructing a BSONObj and running
// extractShardKeyFromMatchable, but doesn't require creating the doc.
BSONObjBuilder keyBuilder;
// Iterate the parsed paths to avoid re-parsing
for (auto it = _keyPatternPaths.begin(); it != _keyPatternPaths.end(); ++it) {
const FieldRef& patternPath = **it;
BSONElement equalEl = findEqualityElement(equalities, patternPath);
if (!isShardKeyElement(equalEl, false))
return BSONObj();
if (isHashedPattern()) {
keyBuilder.append(
patternPath.dottedField(),
BSONElementHasher::hash64(equalEl, BSONElementHasher::DEFAULT_HASH_SEED));
} else {
// NOTE: The equal element may *not* have the same field name as the path -
// nested $and, $eq, for example
keyBuilder.appendAs(equalEl, patternPath.dottedField());
}
}
dassert(isShardKey(keyBuilder.asTempObj()));
return keyBuilder.obj();
}
bool ShardKeyPattern::isUniqueIndexCompatible(const BSONObj& uniqueIndexPattern) const {
dassert(!KeyPattern::isHashedKeyPattern(uniqueIndexPattern));
if (!uniqueIndexPattern.isEmpty() &&
string("_id") == uniqueIndexPattern.firstElementFieldName()) {
return true;
}
return _keyPattern.toBSON().isFieldNamePrefixOf(uniqueIndexPattern);
}
BoundList ShardKeyPattern::flattenBounds(const IndexBounds& indexBounds) const {
invariant(indexBounds.fields.size() == (size_t)_keyPattern.toBSON().nFields());
// If any field is unsatisfied, return empty bound list.
for (vector::const_iterator it = indexBounds.fields.begin();
it != indexBounds.fields.end();
it++) {
if (it->intervals.size() == 0) {
return BoundList();
}
}
// To construct our bounds we will generate intervals based on bounds for the first field, then
// compound intervals based on constraints for the first 2 fields, then compound intervals for
// the first 3 fields, etc.
//
// As we loop through the fields, we start generating new intervals that will later get extended
// in another iteration of the loop. We define these partially constructed intervals using pairs
// of BSONObjBuilders (shared_ptrs, since after one iteration of the loop they still must exist
// outside their scope).
typedef vector, std::shared_ptr>>
BoundBuilders;
BoundBuilders builders;
builders.emplace_back(shared_ptr(new BSONObjBuilder()),
shared_ptr(new BSONObjBuilder()));
BSONObjIterator keyIter(_keyPattern.toBSON());
// until equalityOnly is false, we are just dealing with equality (no range or $in queries).
bool equalityOnly = true;
for (size_t i = 0; i < indexBounds.fields.size(); i++) {
BSONElement e = keyIter.next();
StringData fieldName = e.fieldNameStringData();
// get the relevant intervals for this field, but we may have to transform the
// list of what's relevant according to the expression for this field
const OrderedIntervalList& oil = indexBounds.fields[i];
const vector& intervals = oil.intervals;
if (equalityOnly) {
if (intervals.size() == 1 && intervals.front().isPoint()) {
// this field is only a single point-interval
BoundBuilders::const_iterator j;
for (j = builders.begin(); j != builders.end(); ++j) {
j->first->appendAs(intervals.front().start, fieldName);
j->second->appendAs(intervals.front().end, fieldName);
}
} else {
// This clause is the first to generate more than a single point.
// We only execute this clause once. After that, we simplify the bound
// extensions to prevent combinatorial explosion.
equalityOnly = false;
BoundBuilders newBuilders;
for (BoundBuilders::const_iterator it = builders.begin(); it != builders.end();
++it) {
BSONObj first = it->first->obj();
BSONObj second = it->second->obj();
for (vector::const_iterator interval = intervals.begin();
interval != intervals.end();
++interval) {
uassert(17439,
"combinatorial limit of $in partitioning of results exceeded",
newBuilders.size() < kMaxFlattenedInCombinations);
newBuilders.emplace_back(shared_ptr(new BSONObjBuilder()),
shared_ptr(new BSONObjBuilder()));
newBuilders.back().first->appendElements(first);
newBuilders.back().second->appendElements(second);
newBuilders.back().first->appendAs(interval->start, fieldName);
newBuilders.back().second->appendAs(interval->end, fieldName);
}
}
builders = newBuilders;
}
} else {
// if we've already generated a range or multiple point-intervals
// just extend what we've generated with min/max bounds for this field
BoundBuilders::const_iterator j;
for (j = builders.begin(); j != builders.end(); ++j) {
j->first->appendAs(intervals.front().start, fieldName);
j->second->appendAs(intervals.back().end, fieldName);
}
}
}
BoundList ret;
for (BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i)
ret.emplace_back(i->first->obj(), i->second->obj());
return ret;
}
} // namespace mongo