summaryrefslogtreecommitdiff
path: root/src/mongo/db/s/config/initial_split_policy.h
blob: 6a2ca2d2a310fffbe800bd85d33a0c971ad978b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <vector>

#include "mongo/bson/bsonobj.h"
#include "mongo/db/namespace_string.h"
#include "mongo/s/catalog/type_chunk.h"
#include "mongo/s/catalog/type_tags.h"
#include "mongo/s/shard_id.h"
#include "mongo/s/shard_key_pattern.h"

namespace mongo {

class InitialSplitPolicy {
public:
    /**
     * For new collections which use hashed shard keys, we can can pre-split the range of possible
     * hashes into a large number of chunks, and distribute them evenly at creation time.
     *
     * Until we design a better initialization scheme, the most performant way to pre-split is to
     * make one big chunk for each shard and migrate them one at a time. Because of this:
     * - 'initialSplitPoints' is populated with the split points to use on the primary shard to
     * produce the initial "big chunks."
     * - 'finalSplitPoints' is populated with the additional split points to use on the "big chunks"
     * after the "big chunks" have been spread evenly across shards through migrations.
     */
    static void calculateHashedSplitPointsForEmptyCollection(
        const ShardKeyPattern& shardKeyPattern,
        bool isEmpty,
        int numShards,
        int numInitialChunks,
        std::vector<BSONObj>* initialSplitPoints,
        std::vector<BSONObj>* finalSplitPoints);

    struct ShardCollectionConfig {
        std::vector<ChunkType> chunks;

        const auto& collVersion() const {
            return chunks.back().getVersion();
        }
    };

    /**
     * Produces the initial chunks that need to be written for a collection which is being
     * newly-sharded. The function performs some basic validation of the input parameters, but there
     * is no checking whether the collection contains any data or not.
     *
     * Chunks are assigned to a shard in a round-robin fashion, numContiguousChunksPerShard (k)
     * chunks at a time. For example, the first k chunks are assigned to the first available shard,
     * and the next k chunks are assigned to the second available shard and so on.
     * numContiguousChunksPerShard should only be > 1 when we do not pre-split the range
     * into larger chunks and then split the resulting chunks on the destination shards as in
     * configSvrShardCollection, thus should be equal the number of final split points + 1 divided
     * by the number of initial split points + 1. It serves to preserve the ordering/contigousness
     * of chunks when split by shardSvrShardCollection so that its yields the exact same shard
     * assignments as configSvrShardCollection.
     */
    static ShardCollectionConfig generateShardCollectionInitialChunks(
        const NamespaceString& nss,
        const ShardKeyPattern& shardKeyPattern,
        const ShardId& databasePrimaryShardId,
        const Timestamp& validAfter,
        const std::vector<BSONObj>& splitPoints,
        const std::vector<ShardId>& allShardIds,
        const int numContiguousChunksPerShard = 1);

    /**
     * Produces the initial chunks that need to be written for a collection which is being
     * newly-sharded based on the given tags. Chunks that do not correspond to any pre-defined
     * zones are assigned to available shards in a round-robin fashion.
     */
    static ShardCollectionConfig generateShardCollectionInitialZonedChunks(
        const NamespaceString& nss,
        const ShardKeyPattern& shardKeyPattern,
        const Timestamp& validAfter,
        const std::vector<TagsType>& tags,
        const StringMap<std::vector<ShardId>>& tagToShards,
        const std::vector<ShardId>& allShardIds);

    /**
     * Creates the first chunks for a newly sharded collection.
     * Returns the created chunks.
     */
    static ShardCollectionConfig createFirstChunks(OperationContext* opCtx,
                                                   const NamespaceString& nss,
                                                   const ShardKeyPattern& shardKeyPattern,
                                                   const ShardId& primaryShardId,
                                                   const std::vector<BSONObj>& splitPoints,
                                                   const std::vector<TagsType>& tags,
                                                   const bool distributeInitialChunks,
                                                   const int numContiguousChunksPerShard = 1);

    /**
     * Writes to the config server the first chunks for a newly sharded collection.
     */
    static void writeFirstChunksToConfig(
        OperationContext* opCtx, const InitialSplitPolicy::ShardCollectionConfig& initialChunks);
};
}  // namespace mongo