summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/collection_truncate_markers.h
blob: 05ce25909186c36c198c79aa9acbb8d55fd642b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
/**
 *    Copyright (C) 2023-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <boost/optional.hpp>

#include "mongo/db/record_id.h"
#include "mongo/db/storage/record_store.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/platform/mutex.h"
#include "mongo/stdx/condition_variable.h"

namespace mongo {

class OperationContext;


// Keep "markers" against a collection to efficiently remove ranges of old records when the
// collection grows. This class is meant to be used only with collections that have the following
// requirements:
// * The collection is an insert-only collection
// * The collection has no indexes
// * If a record with RecordId=Max has to be deleted then all previous records with RecordId D such
// that Min < D <= Max should be deleted. With RecordID=Min defining a lower boundary.
//
// If these requirements hold then this class can be used to compute and maintain up-to-date markers
// for ranges of deletions. These markers will be expired and returned to the deleter whenever the
// implementation defined '_hasExcessMarkers' returns true.
class CollectionTruncateMarkers {
public:
    /** Markers represent "waypoints" of the collection that contain information between the current
     * marker and the previous one.
     *
     * Markers are created by the class automatically whenever there are more than X number of bytes
     * between the previous marker and the latest insertion.
     *
     *                                                               'partial marker'
     *            |___________________|......|____________________|______
     *               Oldest Marker               Newest Marker
     *  Min rid  <-------------------------------------------------<------- Max rid
     *
     * A 'Marker' is not created until it is full or its creation is requested by a caller. A
     * 'partial marker' is not of type 'Marker', but rather metadata counting incoming records and
     * bytes until it can be used to construct a 'Marker'.
     *
     *                    Marker
     *             |__________________|
     *                          lastRecord
     */
    struct Marker {
        int64_t records;      // Approximate number of records between the current marker and the
                              // previous marker.
        int64_t bytes;        // Approximate size of records between the current marker and the
                              // previous marker.
        RecordId lastRecord;  // RecordId of the record that created this marker.
        Date_t wallTime;      // Walltime of the record that created this marker.

        Marker(int64_t records, int64_t bytes, RecordId lastRecord, Date_t wallTime)
            : records(records),
              bytes(bytes),
              lastRecord(std::move(lastRecord)),
              wallTime(wallTime) {}
    };


    CollectionTruncateMarkers(std::deque<Marker> markers,
                              int64_t leftoverRecordsCount,
                              int64_t leftoverRecordsBytes,
                              int64_t minBytesPerMarker)
        : _minBytesPerMarker(minBytesPerMarker),
          _currentRecords(leftoverRecordsCount),
          _currentBytes(leftoverRecordsBytes),
          _markers(std::move(markers)) {}

    /**
     * Whether the instance is going to get destroyed.
     */
    bool isDead();

    /**
     * Mark this instance as serving a non-existent RecordStore. This is the case if either the
     * RecordStore has been deleted or we're shutting down. Doing this will mark the instance as
     * ready for destruction.
     */
    void kill();

    void awaitHasExcessMarkersOrDead(OperationContext* opCtx);

    boost::optional<Marker> peekOldestMarkerIfNeeded(OperationContext* opCtx) const;

    void popOldestMarker();

    void createNewMarkerIfNeeded(OperationContext* opCtx,
                                 const RecordId& lastRecord,
                                 Date_t wallTime);

    // Updates the current marker with the inserted value if the operation commits the WUOW.
    virtual void updateCurrentMarkerAfterInsertOnCommit(OperationContext* opCtx,
                                                        int64_t bytesInserted,
                                                        const RecordId& highestInsertedRecordId,
                                                        Date_t wallTime,
                                                        int64_t countInserted);

    // Clears all the markers of the instance whenever the current WUOW commits.
    void clearMarkersOnCommit(OperationContext* opCtx);

    // Updates the metadata about the collection markers after a rollback occurs.
    void updateMarkersAfterCappedTruncateAfter(int64_t recordsRemoved,
                                               int64_t bytesRemoved,
                                               const RecordId& firstRemovedId);

    // The method used for creating the initial set of markers.
    enum class MarkersCreationMethod { EmptyCollection, Scanning, Sampling };
    // The initial set of markers to use when constructing the CollectionMarkers object.
    struct InitialSetOfMarkers {
        std::deque<Marker> markers;
        int64_t leftoverRecordsCount;
        int64_t leftoverRecordsBytes;
        Microseconds timeTaken;
        MarkersCreationMethod methodUsed;
    };
    struct RecordIdAndWallTime {
        RecordId id;
        Date_t wall;

        RecordIdAndWallTime(RecordId lastRecord, Date_t wallTime)
            : id(std::move(lastRecord)), wall(std::move(wallTime)) {}
    };

    // Creates the initial set of markers. This will decide whether to perform a collection scan or
    // sampling based on the size of the collection.
    //
    // 'numberOfMarkersToKeepLegacy' exists solely to maintain legacy behavior of
    // 'OplogTruncateMarkers' previously known as 'OplogStones'. It serves as the maximum number of
    // truncate markers to keep before reclaiming the oldest truncate markers.
    static InitialSetOfMarkers createFromExistingRecordStore(
        OperationContext* opCtx,
        RecordStore* rs,
        const NamespaceString& ns,
        int64_t minBytesPerMarker,
        std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime,
        boost::optional<int64_t> numberOfMarkersToKeepLegacy = boost::none);

    // Creates the initial set of markers by fully scanning the collection. The set of markers
    // returned will have correct metrics.
    static InitialSetOfMarkers createMarkersByScanning(
        OperationContext* opCtx,
        RecordStore* rs,
        const NamespaceString& ns,
        int64_t minBytesPerMarker,
        std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime);

    // Creates the initial set of markers by sampling the collection. The set of markers
    // returned will have approximate metrics. The metrics of each marker will be equal and contain
    // the collection's size and record count divided by the number of markers.
    static InitialSetOfMarkers createMarkersBySampling(
        OperationContext* opCtx,
        RecordStore* rs,
        const NamespaceString& ns,
        int64_t estimatedRecordsPerMarker,
        int64_t estimatedBytesPerMarker,
        std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime);

    void setMinBytesPerMarker(int64_t size);

    //
    // The following methods are public only for use in tests.
    //

    size_t numMarkers() const {
        stdx::lock_guard<Latch> lk(_markersMutex);
        return _markers.size();
    }

    int64_t currentBytes() const {
        return _currentBytes.load();
    }

    int64_t currentRecords() const {
        return _currentRecords.load();
    }

private:
    friend class CollectionTruncateMarkersWithPartialExpiration;

    // Used to decide whether the oldest marker has expired. Implementations are free to use
    // whichever process they want to discern if there are expired markers.
    // This method will get called holding the _collectionMarkersReclaimMutex and _markersMutex.
    virtual bool _hasExcessMarkers(OperationContext* opCtx) const = 0;

    static constexpr uint64_t kRandomSamplesPerMarker = 10;

    Mutex _collectionMarkersReclaimMutex =
        MONGO_MAKE_LATCH("CollectionTruncateMarkers::_collectionMarkersReclaimMutex");
    stdx::condition_variable _reclaimCv;

    // True if '_rs' has been destroyed, e.g. due to repairDatabase being called on the collection's
    // database, and false otherwise.
    bool _isDead = false;

    // Minimum number of bytes the marker being filled should contain before it gets added to the
    // deque of collection markers.
    int64_t _minBytesPerMarker;

    AtomicWord<int64_t> _currentRecords;  // Number of records in the marker being filled.
    AtomicWord<int64_t> _currentBytes;    // Number of bytes in the marker being filled.

    // Protects against concurrent access to the deque of collection markers.
    mutable Mutex _markersMutex = MONGO_MAKE_LATCH("CollectionTruncateMarkers::_markersMutex");
    std::deque<Marker> _markers;  // front = oldest, back = newest.

protected:
    CollectionTruncateMarkers(CollectionTruncateMarkers&& other);

    const std::deque<Marker>& getMarkers() const {
        return _markers;
    }

    void pokeReclaimThread(OperationContext* opCtx);

    Marker& createNewMarker(const RecordId& lastRecord, Date_t wallTime);
};

/**
 * An extension of CollectionTruncateMarkers that provides support for creating "partial markers".
 *
 * Partial markers are normal markers that can be requested by the user calling
 * CollectionTruncateMarkersWithPartialExpiration::createPartialMarkerIfNecessary. The
 * implementation will then consider whether the current data awaiting a marker should be deleted
 * according to some internal logic. This is useful in time-based expiration systems as there could
 * be low activity collections containing data that should be expired but won't because there is no
 * marker.
 */
class CollectionTruncateMarkersWithPartialExpiration : public CollectionTruncateMarkers {
public:
    CollectionTruncateMarkersWithPartialExpiration(std::deque<Marker> markers,
                                                   int64_t leftoverRecordsCount,
                                                   int64_t leftoverRecordsBytes,
                                                   int64_t minBytesPerMarker)
        : CollectionTruncateMarkers(
              std::move(markers), leftoverRecordsCount, leftoverRecordsBytes, minBytesPerMarker) {}

    // Creates a partially filled marker if necessary. The criteria used is whether there is data in
    // the partial marker and whether the implementation's '_hasPartialMarkerExpired' returns true.
    void createPartialMarkerIfNecessary(OperationContext* opCtx);

    virtual void updateCurrentMarkerAfterInsertOnCommit(OperationContext* opCtx,
                                                        int64_t bytesInserted,
                                                        const RecordId& highestInsertedRecordId,
                                                        Date_t wallTime,
                                                        int64_t countInserted) final;

private:
    // Highest marker seen during the lifetime of the class. Modifications must happen
    // while holding '_lastHighestRecordMutex'.
    mutable Mutex _lastHighestRecordMutex =
        MONGO_MAKE_LATCH("CollectionTruncateMarkersWithPartialExpiration::_lastHighestRecordMutex");
    RecordId _lastHighestRecordId;
    Date_t _lastHighestWallTime;

    // Replaces the highest marker if _isMarkerLargerThanHighest returns true.
    void _replaceNewHighestMarkingIfNecessary(const RecordId& newMarkerRecordId,
                                              Date_t newMarkerWallTime);

    // Used to decide if the current partially built marker has expired.
    virtual bool _hasPartialMarkerExpired(OperationContext* opCtx) const {
        return false;
    }

protected:
    CollectionTruncateMarkersWithPartialExpiration(
        CollectionTruncateMarkersWithPartialExpiration&& other);

    std::pair<const RecordId&, const Date_t&> getPartialMarker() const {
        return {_lastHighestRecordId, _lastHighestWallTime};
    }
};

}  // namespace mongo