summaryrefslogtreecommitdiff
path: root/src/mongo/db/catalog/validate_state.h
blob: fa99e652567e356abffda961c5f4c8e1e3e07bd6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/**
 *    Copyright (C) 2019-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/catalog/collection_validation.h"
#include "mongo/db/catalog/throttle_cursor.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/server_options.h"
#include "mongo/db/storage/record_store.h"
#include "mongo/db/storage/storage_parameters_gen.h"
#include "mongo/util/uuid.h"

namespace mongo {
namespace CollectionValidation {

/**
 * Contains information about the collection being validated and the user provided validation
 * options. Additionally it maintains the state of shared objects throughtout the validation, such
 * as locking, cursors and data throttling.
 */
class ValidateState {
    ValidateState(const ValidateState&) = delete;
    ValidateState& operator=(const ValidateState&) = delete;

public:
    /**
     * 'turnOnExtraLoggingForTest' turns on extra logging for test debugging. This parameter is for
     * unit testing only.
     */
    ValidateState(OperationContext* opCtx,
                  const NamespaceString& nss,
                  ValidateMode mode,
                  RepairMode repairMode,
                  bool turnOnExtraLoggingForTest = false);

    const NamespaceString& nss() const {
        return _nss;
    }

    bool isMetadataValidation() const {
        return _mode == ValidateMode::kMetadata;
    }

    bool isBackground() const {
        return _mode == ValidateMode::kBackground || _mode == ValidateMode::kBackgroundCheckBSON;
    }

    bool shouldEnforceFastCount() const;

    bool isFullValidation() const {
        return _mode == ValidateMode::kForegroundFull ||
            _mode == ValidateMode::kForegroundFullEnforceFastCount;
    }

    bool isFullIndexValidation() const {
        return isFullValidation() || _mode == ValidateMode::kForegroundFullIndexOnly;
    }

    BSONValidateMode getBSONValidateMode() const {
        return serverGlobalParams.featureCompatibility.isVersionInitialized() &&
                feature_flags::gExtendValidateCommand.isEnabled(
                    serverGlobalParams.featureCompatibility) &&
                (_mode == ValidateMode::kForegroundCheckBSON ||
                 _mode == ValidateMode::kBackgroundCheckBSON || isFullValidation())
            ? BSONValidateMode::kFull
            : BSONValidateMode::kExtended;
    }

    bool isCollectionSchemaViolated() const {
        return _collectionSchemaViolated;
    }

    void setCollectionSchemaViolated() {
        _collectionSchemaViolated = true;
    }

    bool isTimeseriesDataInconsistent() const {
        return _timeseriesDataInconsistency;
    }
    void setTimeseriesDataInconsistent() {
        _timeseriesDataInconsistency = true;
    }

    bool isBSONDataNonConformant() const {
        return _BSONDataNonConformant;
    }

    void setBSONDataNonConformant() {
        _BSONDataNonConformant = true;
    }

    bool fixErrors() const {
        return _repairMode == RepairMode::kFixErrors;
    }

    bool adjustMultikey() const {
        return _repairMode == RepairMode::kFixErrors || _repairMode == RepairMode::kAdjustMultikey;
    }

    UUID uuid() const {
        invariant(_uuid);
        return *_uuid;
    }

    const Database* getDatabase() const {
        invariant(_database);
        return _database;
    }

    const CollectionPtr& getCollection() const {
        invariant(_collection);
        return _collection;
    }

    const std::vector<std::shared_ptr<const IndexCatalogEntry>>& getIndexes() const {
        return _indexes;
    }

    const StringSet& getSkippedIndexes() const {
        return _skippedIndexes;
    }

    /**
     * Map of index names to index cursors.
     */
    const StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>>& getIndexCursors() const {
        return _indexCursors;
    }

    const std::unique_ptr<SeekableRecordThrottleCursor>& getTraverseRecordStoreCursor() const {
        return _traverseRecordStoreCursor;
    }

    const std::unique_ptr<SeekableRecordThrottleCursor>& getSeekRecordStoreCursor() const {
        return _seekRecordStoreCursor;
    }

    const StringMap<std::unique_ptr<ColumnStore::Cursor>>& getColumnStoreCursors() const {
        return _columnStoreIndexCursors;
    }

    RecordId getFirstRecordId() const {
        return _firstRecordId;
    }

    /**
     * Yields locks for background validation; or cursors for foreground validation. Locks are
     * yielded to allow DDL ops to run concurrently with background validation. Cursors are yielded
     * for foreground validation in order to avoid building cache pressure caused by holding a
     * snapshot too long.
     *
     * See _yieldLocks() and _yieldCursors() for details. Throws on interruptions.
     */
    void yield(OperationContext* opCtx);

    /**
     * Initializes all the cursors to be used during validation and moves the traversal record
     * store cursor to the first record.
     */
    void initializeCursors(OperationContext* opCtx);

    /**
     * Indicates whether extra logging should occur during validation.
     *
     * This is for unit testing only. Intended to improve diagnosibility.
     */
    bool extraLoggingForTest() {
        return _extraLoggingForTest;
    }

    boost::optional<Timestamp> getValidateTimestamp() {
        return _validateTs;
    }

private:
    ValidateState() = delete;

    /**
     * Re-locks the database and collection with the appropriate locks for background validation.
     * This should only be called when '_mode' is set to 'kBackground'.
     */
    void _relockDatabaseAndCollection(OperationContext* opCtx);

    /**
     * Yields both the database and collection locks temporarily in order to allow concurrent DDL
     * operations to passthrough. After both the database and collection locks have been restored,
     * check if validation can resume. Validation cannot be resumed if the database or collection is
     * dropped. In addition, if any indexes that were being validated are removed, validation will
     * be interrupted. A collection that was renamed across the same database can continue to be
     * validated, but a cross database collection rename will interrupt validation. If the locks
     * cannot be re-acquired, throws the error.
     *
     * Throws an interruption exception if validation cannot continue.
     *
     * After locks are reacquired:
     *     - Check if the database exists.
     *     - Check if the collection exists.
     *     - Check if any indexes that were being validated have been removed.
     */
    void _yieldLocks(OperationContext* opCtx);

    /**
     * Saves and restores the open cursors to release snapshots and minimize cache pressure for
     * validation.
     */
    void _yieldCursors(OperationContext* opCtx);

    bool _isIndexDataCheckpointed(OperationContext* opCtx, const IndexCatalogEntry* entry);

    NamespaceString _nss;
    ValidateMode _mode;
    RepairMode _repairMode;
    bool _collectionSchemaViolated = false;
    bool _timeseriesDataInconsistency = false;
    bool _BSONDataNonConformant = false;

    boost::optional<ShouldNotConflictWithSecondaryBatchApplicationBlock> _noPBWM;
    boost::optional<Lock::GlobalLock> _globalLock;
    boost::optional<AutoGetDb> _databaseLock;
    boost::optional<CollectionNamespaceOrUUIDLock> _collectionLock;

    Database* _database;
    CollectionPtr _collection;

    // Always present after construction, but needs to be boost::optional due to the lack of default
    // constructor
    boost::optional<UUID> _uuid;

    // Stores the indexes that are going to be validated. When validate yields periodically we'll
    // use this list to determine if validation should abort when an existing index that was
    // being validated is dropped. Additionally we'll use this list to determine which indexes to
    // skip during validation that may have been created in-between yields.
    std::vector<std::shared_ptr<const IndexCatalogEntry>> _indexes;

    // Shared cursors to be used during validation, created in 'initializeCursors()'.
    StringMap<std::unique_ptr<SortedDataInterfaceThrottleCursor>> _indexCursors;
    std::unique_ptr<SeekableRecordThrottleCursor> _traverseRecordStoreCursor;
    std::unique_ptr<SeekableRecordThrottleCursor> _seekRecordStoreCursor;
    StringMap<std::unique_ptr<ColumnStore::Cursor>> _columnStoreIndexCursors;

    // Stores the set of indexes that will not be validated for some reason, e.g. they are not
    // ready.
    StringSet _skippedIndexes;

    RecordId _firstRecordId;

    DataThrottle _dataThrottle;

    // Used to detect when the catalog is re-opened while yielding locks.
    uint64_t _catalogGeneration;

    // Can be set by unit tests to obtain better insight into what validate sees/does.
    bool _extraLoggingForTest;

    boost::optional<Timestamp> _validateTs = boost::none;
};

}  // namespace CollectionValidation
}  // namespace mongo