/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include #include #include "mongo/base/status.h" #include "mongo/base/status_with.h" #include "mongo/db/exec/filter.h" #include "mongo/db/jsobj.h" #include "mongo/db/keypattern.h" #include "mongo/db/query/index_bounds.h" #include "mongo/db/repl/oplog_entry.h" namespace mongo { class CanonicalQuery; class FieldRef; class OperationContext; /** * Helper struct when generating flattened bounds below * * A BoundList contains intervals specified by inclusive start * and end bounds. The intervals should be nonoverlapping and occur in * the specified direction of traversal. For example, given a simple index {i:1} * and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList * would be valid for index {i:-1} with direction -1. */ typedef std::vector> BoundList; /** * A ShardKeyPattern represents the key pattern used to partition data in a collection between * shards. Shard keys are extracted from documents, simple queries, or Matchable objects based * on the paths within the key pattern. * * Shard key pattern paths may be nested, but are not traversable through arrays - this means * a shard key pattern path always yields a single value. */ class ShardKeyPattern { public: /** * A struct to represent the index key data. The 'data' field represents the actual key data and * the 'pattern' represents the index key pattern. For an index pattern {a: 1, b: 'hashed'} the * key data would look like {"": "value", "": NumberLong(12345)}. */ struct IndexKeyData { BSONObj data; BSONObj pattern; }; /** * Validates whether the specified shard key is valid to be written as part of the sharding * metadata. */ static Status checkShardKeyIsValidForMetadataStorage(const BSONObj& shardKey); /** * Constructs a shard key pattern from a BSON pattern document. If the document is not a * valid shard key pattern, !isValid() will be true and key extraction will fail. */ explicit ShardKeyPattern(const BSONObj& keyPattern); /** * Constructs a shard key pattern from a key pattern, see above. */ explicit ShardKeyPattern(const KeyPattern& keyPattern); /** * Returns whether the provided element is hashed. */ static bool isHashedPatternEl(const BSONElement& el); /** * Returns the BSONElement pointing to the hashed field. Returns empty BSONElement if not found. */ static BSONElement extractHashedField(BSONObj keyPattern); /** * Check if the given BSONElement is of type 'MinKey', 'MaxKey' or 'NumberLong', which are the * only acceptable values for hashed fields. */ static bool isValidHashedValue(const BSONElement& el); bool isHashedPattern() const; bool hasHashedPrefix() const; BSONElement getHashedField() const; const KeyPattern& getKeyPattern() const; const std::vector>& getKeyPatternFields() const; const BSONObj& toBSON() const; std::string toString() const; /** * Converts the passed in key pattern into a KeyString. * Note: this function strips the field names when creating the KeyString. */ static std::string toKeyString(const BSONObj& shardKey); /** * Returns true if the provided document is a shard key - i.e. has the same fields as the * shard key pattern and valid shard key values. */ bool isShardKey(const BSONObj& shardKey) const; /** * Returns true if the new shard key pattern extends this shard key pattern - i.e. contains this * shard key pattern as a prefix (begins with the same field names in the same order). */ bool isExtendedBy(const ShardKeyPattern& newShardKeyPattern) const; /** * Given a shard key, return it in normal form where the fields are in the same order as * the shard key pattern fields. * * If the shard key is invalid, returns BSONObj() */ BSONObj normalizeShardKey(const BSONObj& shardKey) const; /** * Given one or more index keys, potentially from more than one index, extracts the shard key * corresponding to the shard key pattern. * * All the shard key fields must be present in at least one of the index keys. A missing shard * key field will result in an invariant. */ BSONObj extractShardKeyFromIndexKeyData(const std::vector& indexKeyData) const; /** * Given a document key expressed in dotted notation, extracts its shard key, applying hashing * if necessary. * Note: For a shardKeyPattern {a.b: 1, c: 1} * The documentKey for the document {a: {b: 10}, c: 20} is {a.b: 10, c: 20} * The documentKey for the document {a: {b: 10, d: 20}, c: 30} is {a.b: 10, c: 30} * The documentKey for the document {a: {b: {d: 10}}, c: 30} is {a.b: {d: 10}, c: 30} * * Examples: * If 'this' KeyPattern is {a: 1} * {a: 10, b: 20} --> returns {a: 10} * {b: 20} --> returns {a: null} * {a: {b: 10}} --> returns {a: {b: 10}} * {a: [1,2]} --> returns {} * If 'this' KeyPattern is {a.b: 1, c: 1} * {a.b: 10, c: 20} --> returns {a.b: 10, c: 20} * {a.b: 10} --> returns {a.b: 10, c: null} * {a.b: {z: 10}, c: 20} --> returns {a.b: {z: 10}, c: 20} * If 'this' KeyPattern is {a : "hashed"} * {a: 10, b: 20} --> returns {a: NumberLong("7766103514953448109")} * {b: 20} --> returns {a: NumberLong("2338878944348059895")} */ BSONObj extractShardKeyFromDocumentKey(const BSONObj& documentKey) const; BSONObj extractShardKeyFromDocumentKeyThrows(const BSONObj& documentKey) const; /** * Given a document, extracts the shard key corresponding to the key pattern. Paths to shard key * fields must not contain arrays at any level, and shard keys may not be array fields or * non-storable sub-documents. If the shard key pattern is a hashed key pattern, this method * performs the hashing. * * If any shard key fields are missing from the document, the extraction will treat these * fields as null. * * If a shard key cannot be extracted, returns an empty BSONObj(). * * Examples: * If 'this' KeyPattern is { a : 1 } * { a: "hi" , b : 4} --> returns { a : "hi" } * { c : 4 , a : 2 } --> returns { a : 2 } * { b : 2 } -> returns {} * { a : [1,2] } -> returns {} * If 'this' KeyPattern is { a : "hashed" } * { a: 1 } --> returns { a : NumberLong("5902408780260971510") } * If 'this' KeyPattern is { 'a.b' : 1 } * { a : { b : "hi" } } --> returns { 'a.b' : "hi" } * { a : [{ b : "hi" }] } --> returns {} * If 'this' KeyPattern is { a: 1 , b: 1 } * { a: 1 } --> returns { a: 1, b: null } * { b: 1 } --> returns { a: null, b: 1 } */ BSONObj extractShardKeyFromDoc(const BSONObj& doc) const; BSONObj extractShardKeyFromDocThrows(const BSONObj& doc) const; /** * Given an Oplog entry, extracts the shard key corresponding to the key pattern for insert, * update, and delete op types. If the op type is not a CRUD operation, an empty BSONObj() * will be returned. * * For update and delete operations, the Oplog entry will contain an object with the document * key. * * For insert operations, the Oplog entry will contain the original document from which the * document key must be extracted * * Examples: * For KeyPattern {'a.b': 1} * If the oplog entries contains field op='i' * oplog contains: { a : { b : "1" } } * If the oplog entries contains field op='u' or op='d' * oplog contains: { 'a.b': "1" } */ BSONObj extractShardKeyFromOplogEntry(const repl::OplogEntry& entry) const; /** * Returns the document with missing shard key values set to null. */ BSONObj emplaceMissingShardKeyValuesForDocument(BSONObj doc) const; /** * Given a simple BSON query, extracts the shard key corresponding to the key pattern * from equality matches in the query. The query expression *must not* be a complex query * with sorts or other attributes. * * Logically, the equalities in the BSON query can be serialized into a BSON document and * then a shard key is extracted from this equality document. * * NOTE: BSON queries and BSON documents look similar but are different languages. Use the * correct shard key extraction function. * * Returns !OK status if the query cannot be parsed. Returns an empty BSONObj() if there is * no shard key found in the query equalities. * * Examples: * If the key pattern is { a : 1 } * { a : "hi", b : 4 } --> returns { a : "hi" } * { a : { $eq : "hi" }, b : 4 } --> returns { a : "hi" } * { $and : [{a : { $eq : "hi" }}, { b : 4 }] } --> returns { a : "hi" } * If the key pattern is { 'a.b' : 1 } * { a : { b : "hi" } } --> returns { 'a.b' : "hi" } * { 'a.b' : "hi" } --> returns { 'a.b' : "hi" } * { a : { b : { $eq : "hi" } } } --> returns {} because the query language treats this as * a : { $eq : { b : ... } } */ StatusWith extractShardKeyFromQuery(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& basicQuery) const; // Used to parse queries that contain let parameters and runtime constants. StatusWith extractShardKeyFromQuery(boost::intrusive_ptr expCtx, const BSONObj& basicQuery) const; BSONObj extractShardKeyFromQuery(const CanonicalQuery& query) const; /** * Returns true if the shard key pattern can ensure that the unique index pattern is * respected across all shards. * * Primarily this just checks whether the shard key pattern field names are equal to or a * prefix of the unique index pattern field names. Since documents with the same fields in * the shard key pattern are guaranteed to go to the same shard, and all documents must * contain the full shard key, a unique index with a shard key pattern prefix can be sure * when resolving duplicates that documents on other shards will have different shard keys, * and so are not duplicates. * * Hashed shard key patterns are similar to ordinary patterns in that they guarantee similar * shard keys go to the same shard. * * Examples: * shard key {a : 1} is compatible with a unique index on {_id : 1} * shard key {a : 1} is compatible with a unique index on {a : 1 , b : 1} * shard key {a : 1} is compatible with a unique index on {a : -1 , b : 1 } * shard key {a : "hashed"} is compatible with a unique index on {a : 1} * shard key {a : 1} is not compatible with a unique index on {b : 1} * shard key {a : "hashed" , b : 1 } is not compatible with unique index on { b : 1 } * * All unique index patterns starting with _id are assumed to be enforceable by the fact * that _ids must be unique, and so all unique _id prefixed indexes are compatible with * any shard key pattern. * * NOTE: We assume 'uniqueIndexPattern' is a valid unique index pattern - a pattern like * { k : "hashed" } is not capable of being a unique index and is an invalid argument to * this method. */ bool isUniqueIndexCompatible(const BSONObj& uniqueIndexPattern) const; /** * Return an ordered list of bounds generated using this KeyPattern and the * bounds from the IndexBounds. This function is used in sharding to * determine where to route queries according to the shard key pattern. * * Examples: * * Key { a: 1 }, Bounds a: [0] => { a: 0 } -> { a: 0 } * Key { a: 1 }, Bounds a: [2, 3) => { a: 2 } -> { a: 3 } // bound inclusion ignored. * * The bounds returned by this function may be a superset of those defined * by the constraints. For instance, if this KeyPattern is {a : 1, b: 1} * Bounds: { a : {$in : [1,2]} , b : {$in : [3,4,5]} } * => {a : 1 , b : 3} -> {a : 1 , b : 5}, {a : 2 , b : 3} -> {a : 2 , b : 5} * * If the IndexBounds are not defined for all the fields in this keypattern, which * means some fields are unsatisfied, an empty BoundList could return. * */ BoundList flattenBounds(const IndexBounds& indexBounds) const; /** * Returns true if the key pattern has an "_id" field of any flavor. */ bool hasId() const { return _hasId; }; size_t getApproximateSize() const; private: KeyPattern _keyPattern; // Ordered, parsed paths std::vector> _keyPatternPaths; bool _hasId; BSONElement _hashedField; }; } // namespace mongo