/** * Copyright (C) 2013 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects * for all of the code used other than as permitted herein. If you modify * file(s) with this exception, you may extend this exception to your * version of the file(s), but you are not obligated to do so. If you do not * wish to do so, delete this exception statement from your version. If you * delete this exception statement from all source files in the program, * then also delete it in the license file. */ #include "mongo/s/shard_key_pattern.h" #include #include #include #include "mongo/db/field_ref.h" #include "mongo/db/field_ref_set.h" #include "mongo/db/hasher.h" #include "mongo/db/index_names.h" #include "mongo/db/ops/path_support.h" #include "mongo/db/query/canonical_query.h" #include "mongo/util/mongoutils/str.h" namespace mongo { using boost::scoped_ptr; using boost::shared_ptr; using std::auto_ptr; using std::pair; using std::make_pair; using std::vector; using std::string; using pathsupport::EqualityMatches; using mongoutils::str::stream; const int ShardKeyPattern::kMaxShardKeySizeBytes = 512; const unsigned int ShardKeyPattern::kMaxFlattenedInCombinations = 4000000; Status ShardKeyPattern::checkShardKeySize(const BSONObj& shardKey) { if (shardKey.objsize() <= kMaxShardKeySizeBytes) return Status::OK(); return Status(ErrorCodes::ShardKeyTooBig, stream() << "shard keys must be less than " << kMaxShardKeySizeBytes << " bytes, but key " << shardKey << " is " << shardKey.objsize() << " bytes"); } static bool isHashedPatternEl(const BSONElement& el) { return el.type() == String && el.String() == IndexNames::HASHED; } /** * Currently the allowable shard keys are either * i) a hashed single field, e.g. { a : "hashed" }, or * ii) a compound list of ascending, potentially-nested field paths, e.g. { a : 1 , b.c : 1 } */ static vector parseShardKeyPattern(const BSONObj& keyPattern) { OwnedPointerVector parsedPaths; static const vector empty; BSONObjIterator patternIt(keyPattern); while (patternIt.more()) { BSONElement patternEl = patternIt.next(); parsedPaths.push_back(new FieldRef(patternEl.fieldNameStringData())); const FieldRef& patternPath = *parsedPaths.back(); // Empty path if (patternPath.numParts() == 0) return empty; // Extra "." in path? if (patternPath.dottedField() != patternEl.fieldNameStringData()) return empty; // Empty parts of the path, ".."? for (size_t i = 0; i < patternPath.numParts(); ++i) { if (patternPath.getPart(i).size() == 0) return empty; } // Numeric and ascending (1.0), or "hashed" and single field if (!patternEl.isNumber()) { if (keyPattern.nFields() != 1 || !isHashedPatternEl(patternEl)) return empty; } else if (patternEl.numberInt() != 1) { return empty; } } return parsedPaths.release(); } ShardKeyPattern::ShardKeyPattern(const BSONObj& keyPattern) : _keyPatternPaths(parseShardKeyPattern(keyPattern)), _keyPattern(_keyPatternPaths.empty() ? BSONObj() : keyPattern) { } ShardKeyPattern::ShardKeyPattern(const KeyPattern& keyPattern) : _keyPatternPaths(parseShardKeyPattern(keyPattern.toBSON())), _keyPattern(_keyPatternPaths.empty() ? KeyPattern(BSONObj()) : keyPattern) { } bool ShardKeyPattern::isValid() const { return !_keyPattern.toBSON().isEmpty(); } bool ShardKeyPattern::isHashedPattern() const { return isHashedPatternEl(_keyPattern.toBSON().firstElement()); } const KeyPattern& ShardKeyPattern::getKeyPattern() const { return _keyPattern; } const BSONObj& ShardKeyPattern::toBSON() const { return _keyPattern.toBSON(); } string ShardKeyPattern::toString() const { return toBSON().toString(); } static bool isShardKeyElement(const BSONElement& element, bool allowRegex) { // TODO: Disallow regex all the time if (element.eoo() || element.type() == Array || (!allowRegex && element.type() == RegEx) || (element.type() == Object && !element.embeddedObject().okForStorage())) return false; return true; } bool ShardKeyPattern::isShardKey(const BSONObj& shardKey) const { // Shard keys are always of the form: { 'nested.path' : value, 'nested.path2' : value } if (!isValid()) return false; BSONObjIterator patternIt(_keyPattern.toBSON()); while (patternIt.more()) { BSONElement patternEl = patternIt.next(); BSONElement keyEl = shardKey[patternEl.fieldNameStringData()]; if (!isShardKeyElement(keyEl, true)) return false; } return true; } BSONObj ShardKeyPattern::normalizeShardKey(const BSONObj& shardKey) const { // Shard keys are always of the form: { 'nested.path' : value, 'nested.path2' : value } // and in the same order as the key pattern if (!isValid()) return BSONObj(); // We want to return an empty key if users pass us something that's not a shard key if (shardKey.nFields() > _keyPattern.toBSON().nFields()) return BSONObj(); BSONObjBuilder keyBuilder; BSONObjIterator patternIt(_keyPattern.toBSON()); while (patternIt.more()) { BSONElement patternEl = patternIt.next(); BSONElement keyEl = shardKey[patternEl.fieldNameStringData()]; if (!isShardKeyElement(keyEl, true)) return BSONObj(); keyBuilder.appendAs(keyEl, patternEl.fieldName()); } dassert(isShardKey(keyBuilder.asTempObj())); return keyBuilder.obj(); } static BSONElement extractKeyElementFromMatchable(const MatchableDocument& matchable, StringData pathStr) { ElementPath path; path.init(pathStr); path.setTraverseNonleafArrays(false); path.setTraverseLeafArray(false); MatchableDocument::IteratorHolder matchIt(&matchable, &path); if (!matchIt->more()) return BSONElement(); BSONElement matchEl = matchIt->next().element(); // We shouldn't have more than one element - we don't expand arrays dassert(!matchIt->more()); return matchEl; } BSONObj // ShardKeyPattern::extractShardKeyFromMatchable(const MatchableDocument& matchable) const { if (!isValid()) return BSONObj(); BSONObjBuilder keyBuilder; BSONObjIterator patternIt(_keyPattern.toBSON()); while (patternIt.more()) { BSONElement patternEl = patternIt.next(); BSONElement matchEl = extractKeyElementFromMatchable(matchable, patternEl.fieldNameStringData()); if (!isShardKeyElement(matchEl, true)) return BSONObj(); if (isHashedPatternEl(patternEl)) { keyBuilder.append(patternEl.fieldName(), BSONElementHasher::hash64(matchEl, BSONElementHasher::DEFAULT_HASH_SEED)); } else { // NOTE: The matched element may *not* have the same field name as the path - // index keys don't contain field names, for example keyBuilder.appendAs(matchEl, patternEl.fieldName()); } } dassert(isShardKey(keyBuilder.asTempObj())); return keyBuilder.obj(); } BSONObj ShardKeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const { BSONMatchableDocument matchable(doc); return extractShardKeyFromMatchable(matchable); } static BSONElement findEqualityElement(const EqualityMatches& equalities, const FieldRef& path) { int parentPathPart; const BSONElement& parentEl = pathsupport::findParentEqualityElement(equalities, path, &parentPathPart); if (parentPathPart == static_cast(path.numParts())) return parentEl; if (parentEl.type() != Object) return BSONElement(); StringData suffixStr = path.dottedSubstring(parentPathPart, path.numParts()); BSONMatchableDocument matchable(parentEl.Obj()); return extractKeyElementFromMatchable(matchable, suffixStr); } StatusWith ShardKeyPattern::extractShardKeyFromQuery(const BSONObj& basicQuery) const { if (!isValid()) return StatusWith(BSONObj()); // Extract equalities from query CanonicalQuery* rawQuery; Status queryStatus = CanonicalQuery::canonicalize("", basicQuery, &rawQuery, WhereCallbackNoop()); if (!queryStatus.isOK()) return StatusWith(queryStatus); scoped_ptr query(rawQuery); EqualityMatches equalities; // TODO: Build the path set initially? FieldRefSet keyPatternPathSet(_keyPatternPaths.vector()); // We only care about extracting the full key pattern paths - if they don't exist (or are // conflicting), we don't contain the shard key. Status eqStatus = pathsupport::extractFullEqualityMatches(*query->root(), keyPatternPathSet, &equalities); // NOTE: Failure to extract equality matches just means we return no shard key - it's not // an error we propagate if (!eqStatus.isOK()) return StatusWith(BSONObj()); // Extract key from equalities // NOTE: The method below is equivalent to constructing a BSONObj and running // extractShardKeyFromMatchable, but doesn't require creating the doc. BSONObjBuilder keyBuilder; // Iterate the parsed paths to avoid re-parsing for (OwnedPointerVector::const_iterator it = _keyPatternPaths.begin(); it != _keyPatternPaths.end(); ++it) { const FieldRef& patternPath = **it; BSONElement equalEl = findEqualityElement(equalities, patternPath); if (!isShardKeyElement(equalEl, false)) return StatusWith(BSONObj()); if (isHashedPattern()) { keyBuilder.append(patternPath.dottedField(), BSONElementHasher::hash64(equalEl, BSONElementHasher::DEFAULT_HASH_SEED)); } else { // NOTE: The equal element may *not* have the same field name as the path - // nested $and, $eq, for example keyBuilder.appendAs(equalEl, patternPath.dottedField()); } } dassert(isShardKey(keyBuilder.asTempObj())); return StatusWith(keyBuilder.obj()); } bool ShardKeyPattern::isUniqueIndexCompatible(const BSONObj& uniqueIndexPattern) const { dassert(!KeyPattern::isHashedKeyPattern(uniqueIndexPattern)); if (!uniqueIndexPattern.isEmpty() && string("_id") == uniqueIndexPattern.firstElementFieldName()) { return true; } return _keyPattern.toBSON().isFieldNamePrefixOf(uniqueIndexPattern); } BoundList ShardKeyPattern::flattenBounds(const IndexBounds& indexBounds) const { invariant(indexBounds.fields.size() == (size_t)_keyPattern.toBSON().nFields()); // If any field is unsatisfied, return empty bound list. for (vector::const_iterator it = indexBounds.fields.begin(); it != indexBounds.fields.end(); it++) { if (it->intervals.size() == 0) { return BoundList(); } } // To construct our bounds we will generate intervals based on bounds for // the first field, then compound intervals based on constraints for the first // 2 fields, then compound intervals for the first 3 fields, etc. // As we loop through the fields, we start generating new intervals that will later // get extended in another iteration of the loop. We define these partially constructed // intervals using pairs of BSONObjBuilders (shared_ptrs, since after one iteration of the // loop they still must exist outside their scope). typedef vector, shared_ptr > > BoundBuilders; BoundBuilders builders; builders.push_back(make_pair(shared_ptr(new BSONObjBuilder()), shared_ptr(new BSONObjBuilder()))); BSONObjIterator keyIter(_keyPattern.toBSON()); // until equalityOnly is false, we are just dealing with equality (no range or $in queries). bool equalityOnly = true; for (size_t i = 0; i < indexBounds.fields.size(); i++) { BSONElement e = keyIter.next(); StringData fieldName = e.fieldNameStringData(); // get the relevant intervals for this field, but we may have to transform the // list of what's relevant according to the expression for this field const OrderedIntervalList& oil = indexBounds.fields[i]; const vector& intervals = oil.intervals; if (equalityOnly) { if (intervals.size() == 1 && intervals.front().isPoint()) { // this field is only a single point-interval BoundBuilders::const_iterator j; for (j = builders.begin(); j != builders.end(); ++j) { j->first->appendAs(intervals.front().start, fieldName); j->second->appendAs(intervals.front().end, fieldName); } } else { // This clause is the first to generate more than a single point. // We only execute this clause once. After that, we simplify the bound // extensions to prevent combinatorial explosion. equalityOnly = false; BoundBuilders newBuilders; for (BoundBuilders::const_iterator it = builders.begin(); it != builders.end(); ++it) { BSONObj first = it->first->obj(); BSONObj second = it->second->obj(); for (vector::const_iterator interval = intervals.begin(); interval != intervals.end(); ++interval) { uassert( 17439, "combinatorial limit of $in partitioning of results exceeded" , newBuilders.size() < kMaxFlattenedInCombinations ); newBuilders.push_back( // make_pair(shared_ptr(new BSONObjBuilder()), shared_ptr(new BSONObjBuilder()))); newBuilders.back().first->appendElements(first); newBuilders.back().second->appendElements(second); newBuilders.back().first->appendAs(interval->start, fieldName); newBuilders.back().second->appendAs(interval->end, fieldName); } } builders = newBuilders; } } else { // if we've already generated a range or multiple point-intervals // just extend what we've generated with min/max bounds for this field BoundBuilders::const_iterator j; for (j = builders.begin(); j != builders.end(); ++j) { j->first->appendAs(intervals.front().start, fieldName); j->second->appendAs(intervals.back().end, fieldName); } } } BoundList ret; for (BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i) ret.push_back(make_pair(i->first->obj(), i->second->obj())); return ret; } }