src/mongo/db/storage/storage_engine.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650

/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <memory>
#include <string>
#include <vector>

#include "mongo/base/status.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/timestamp.h"
#include "mongo/db/catalog/index_builds.h"
#include "mongo/db/resumable_index_builds_gen.h"
#include "mongo/db/storage/temporary_record_store.h"
#include "mongo/util/functional.h"
#include "mongo/util/str.h"

namespace mongo {

class JournalListener;
class DurableCatalog;
class KVEngine;
class OperationContext;
class RecoveryUnit;
class SnapshotManager;
class StorageEngineLockFile;
class StorageEngineMetadata;

struct StorageGlobalParams;

/**
 * The StorageEngine class is the top level interface for creating a new storage engine. All
 * StorageEngine(s) must be registered by calling registerFactory in order to possibly be
 * activated.
 */
class StorageEngine {
public:
    /**
     * This is the minimum valid timestamp; it can be used for reads that need to see all
     * untimestamped data but no timestamped data. We cannot use 0 here because 0 means see all
     * timestamped data.
     */
    static const uint64_t kMinimumTimestamp = 1;

    /**
     * When the storage engine needs to know how much oplog to preserve for the sake of active
     * transactions, it executes a callback that returns either the oldest active transaction
     * timestamp, or boost::none if there is no active transaction, or an error if it fails.
     */
    using OldestActiveTransactionTimestampResult = StatusWith<boost::optional<Timestamp>>;
    using OldestActiveTransactionTimestampCallback =
        std::function<OldestActiveTransactionTimestampResult(Timestamp stableTimestamp)>;

    /**
     * The interface for creating new instances of storage engines.
     *
     * A storage engine provides an instance of this class (along with an associated
     * name) to the global environment, which then sets the global storage engine
     * according to the provided configuration parameter.
     */
    class Factory {
    public:
        virtual ~Factory() {}

        /**
         * Return a new instance of the StorageEngine. The lockFile parameter may be null if
         * params.readOnly is set. Caller owns the returned pointer.
         */
        virtual std::unique_ptr<StorageEngine> create(
            const StorageGlobalParams& params, const StorageEngineLockFile* lockFile) const = 0;

        /**
         * Returns the name of the storage engine.
         *
         * Implementations that change the value of the returned string can cause
         * data file incompatibilities.
         */
        virtual StringData getCanonicalName() const = 0;

        /**
         * Validates creation options for a collection in the StorageEngine.
         * Returns an error if the creation options are not valid.
         *
         * Default implementation only accepts empty objects (no options).
         */
        virtual Status validateCollectionStorageOptions(const BSONObj& options) const {
            if (options.isEmpty())
                return Status::OK();
            return Status(ErrorCodes::InvalidOptions,
                          str::stream() << "storage engine " << getCanonicalName()
                                        << " does not support any collection storage options");
        }

        /**
         * Validates creation options for an index in the StorageEngine.
         * Returns an error if the creation options are not valid.
         *
         * Default implementation only accepts empty objects (no options).
         */
        virtual Status validateIndexStorageOptions(const BSONObj& options) const {
            if (options.isEmpty())
                return Status::OK();
            return Status(ErrorCodes::InvalidOptions,
                          str::stream() << "storage engine " << getCanonicalName()
                                        << " does not support any index storage options");
        }

        /**
         * Validates existing metadata in the data directory against startup options.
         * Returns an error if the storage engine initialization should not proceed
         * due to any inconsistencies between the current startup options and the creation
         * options stored in the metadata.
         */
        virtual Status validateMetadata(const StorageEngineMetadata& metadata,
                                        const StorageGlobalParams& params) const = 0;

        /**
         * Returns a new document suitable for storing in the data directory metadata.
         * This document will be used by validateMetadata() to check startup options
         * on restart.
         */
        virtual BSONObj createMetadataOptions(const StorageGlobalParams& params) const = 0;

        /**
         * Returns whether the engine supports read-only mode. If read-only mode is enabled, the
         * engine may be started on a read-only filesystem (either mounted read-only or with
         * read-only permissions). If readOnly mode is enabled, it is undefined behavior to call
         * methods that write data (e.g. insertRecord). This method is provided on the Factory
         * because it must be called before the storageEngine is instantiated.
         */
        virtual bool supportsReadOnly() const {
            return false;
        }
    };

    /**
     * The destructor should only be called if we are tearing down but not exiting the process.
     */
    virtual ~StorageEngine() {}

    /**
     * Called after the globalStorageEngine pointer has been set up, before any other methods
     * are called. Any initialization work that requires the ability to create OperationContexts
     * should be done here rather than in the constructor.
     */
    virtual void finishInit() = 0;

    /**
     * During the startup process, the storage engine is one of the first components to be started
     * up and fully initialized. But that fully initialized storage engine may not be recognized as
     * the end for the remaining storage startup tasks that still need to be performed.
     *
     * For example, after the storage engine has been fully initialized, we need to access it in
     * order to set up all of the collections and indexes based on the metadata, or perform some
     * corrective measures on the data files, etc.
     *
     * When all of the storage startup tasks are completed as a whole, then this function is called
     * by the external force managing the startup process.
     */
    virtual void notifyStartupComplete() {}

    /**
     * Returns a new interface to the storage engine's recovery unit.  The recovery
     * unit is the durability interface.  For details, see recovery_unit.h
     *
     * Caller owns the returned pointer.
     */
    virtual RecoveryUnit* newRecoveryUnit() = 0;

    /**
     * List the databases stored in this storage engine.
     */
    virtual std::vector<std::string> listDatabases() const = 0;

    /**
     * Returns whether the storage engine supports capped collections.
     */
    virtual bool supportsCappedCollections() const = 0;

    /**
     * Returns whether the storage engine supports checkpoints.
     */
    virtual bool supportsCheckpoints() const = 0;

    /**
     * Returns whether the engine supports a journalling concept or not.
     */
    virtual bool isDurable() const = 0;

    /**
     * Returns true if the engine does not persist data to disk; false otherwise.
     */
    virtual bool isEphemeral() const = 0;

    /**
     * Populates and tears down in-memory data structures, respectively. Only required for storage
     * engines that support recoverToStableTimestamp().
     *
     * Must be called with the global lock acquired in exclusive mode.
     *
     * Unrecognized idents require special handling based on the context known only to the
     * caller. For example, on starting from a previous unclean shutdown, we may try to recover
     * orphaned idents, which are known to the storage engine but not referenced in the catalog.
     */
    virtual void loadCatalog(OperationContext* opCtx, bool loadingFromUncleanShutdown) = 0;
    virtual void closeCatalog(OperationContext* opCtx) = 0;

    /**
     * Closes all file handles associated with a database.
     */
    virtual Status closeDatabase(OperationContext* opCtx, StringData db) = 0;

    /**
     * Deletes all data and metadata for a database.
     */
    virtual Status dropDatabase(OperationContext* opCtx, StringData db) = 0;

    /**
     * Checkpoints the data to disk.
     *
     * 'callerHoldsReadLock' signals whether the caller holds a read lock. A write lock may be taken
     * internally, but will be skipped for callers holding a read lock because a write lock would
     * conflict. The JournalListener will not be updated in this case.
     */
    virtual void flushAllFiles(OperationContext* opCtx, bool callerHoldsReadLock) = 0;

    /**
     * Transitions the storage engine into backup mode.
     *
     * During backup mode the storage engine must stabilize its on-disk files, and avoid
     * any internal processing that may involve file I/O, such as online compaction, so
     * a filesystem level backup may be performed.
     *
     * Storage engines that do not support this feature should use the default implementation.
     * Storage engines that implement this must also implement endBackup().
     *
     * For Storage engines that implement beginBackup the _inBackupMode variable is provided
     * to avoid multiple instance enterting/leaving backup concurrently.
     *
     * If this function returns an OK status, MongoDB can call endBackup to signal the storage
     * engine that filesystem writes may continue. This function should return a non-OK status if
     * filesystem changes cannot be stopped to allow for online backup. If the function should be
     * retried, returns a non-OK status. This function may throw a WriteConflictException, which
     * should trigger a retry by the caller. All other exceptions should be treated as errors.
     */
    virtual Status beginBackup(OperationContext* opCtx) = 0;

    /**
     * Transitions the storage engine out of backup mode.
     *
     * Storage engines that do not support this feature should use the default implementation.
     *
     * Storage engines implementing this feature should fassert when unable to leave backup mode.
     */
    virtual void endBackup(OperationContext* opCtx) = 0;

    /**
     * Disables the storage of incremental backup history until a subsequent incremental backup
     * cursor is requested.
     *
     * The storage engine must release all incremental backup information and resources.
     */
    virtual Status disableIncrementalBackup(OperationContext* opCtx) = 0;

    /**
     * Represents the options that the storage engine can use during full and incremental backups.
     *
     * When performing a full backup where incrementalBackup=false, the values of 'blockSizeMB',
     * 'thisBackupName', and 'srcBackupName' should not be modified.
     *
     * When performing an incremental backup where incrementalBackup=true, we first need a basis for
     * future incremental backups. This first basis (named 'thisBackupName'), which is a full
     * backup, must pass incrementalBackup=true and should not set 'srcBackupName'. An incremental
     * backup will include changed blocks since 'srcBackupName' was taken. This backup (also named
     * 'thisBackupName') will then become the basis for future incremental backups.
     *
     * Note that 'thisBackupName' must exist if and only if incrementalBackup=true while
     * 'srcBackupName' must not exist if incrementalBackup=false but may or may not exist if
     * incrementalBackup=true.
     */
    struct BackupOptions {
        bool disableIncrementalBackup = false;
        bool incrementalBackup = false;
        int blockSizeMB = 16;
        boost::optional<std::string> thisBackupName;
        boost::optional<std::string> srcBackupName;
    };

    /**
     * Represents the file blocks returned by the storage engine during both full and incremental
     * backups. In the case of a full backup, each block is an entire file with offset=0 and
     * length=fileSize. In the case of the first basis for future incremental backups, each block is
     * an entire file with offset=0 and length=0. In the case of a subsequent incremental backup,
     * each block reflects changes made to data files since the basis (named 'thisBackupName') and
     * each block has a maximum size of 'blockSizeMB'.
     *
     * If a file is unchanged in a subsequent incremental backup, a single block is returned with
     * offset=0 and length=0. This allows consumers of the backup API to safely truncate files that
     * are not returned by the backup cursor.
     */
    struct BackupBlock {
        std::string filename;
        std::uint64_t offset = 0;
        std::uint64_t length = 0;
        std::uint64_t fileSize = 0;
    };

    /**
     * Abstract class required for streaming both full and incremental backups. The function
     * getNextBatch() returns a vector containing 'batchSize' or less BackupBlocks. The
     * StreamingCursor has been exhausted if getNextBatch() returns an empty vector.
     */
    class StreamingCursor {
    public:
        StreamingCursor() = delete;
        explicit StreamingCursor(BackupOptions options) : options(options){};

        virtual ~StreamingCursor() = default;

        virtual StatusWith<std::vector<BackupBlock>> getNextBatch(const std::size_t batchSize) = 0;

    protected:
        BackupOptions options;
    };

    virtual StatusWith<std::unique_ptr<StreamingCursor>> beginNonBlockingBackup(
        OperationContext* opCtx, const BackupOptions& options) = 0;

    virtual void endNonBlockingBackup(OperationContext* opCtx) = 0;

    virtual StatusWith<std::vector<std::string>> extendBackupCursor(OperationContext* opCtx) = 0;

    /**
     * Recover as much data as possible from a potentially corrupt RecordStore.
     * This only recovers the record data, not indexes or anything else.
     *
     * The Collection object for on this namespace will be destructed and invalidated. A new
     * Collection object will be created and it should be retrieved from the CollectionCatalog.
     */
    virtual Status repairRecordStore(OperationContext* opCtx,
                                     RecordId catalogId,
                                     const NamespaceString& nss) = 0;

    /**
     * Creates a temporary RecordStore on the storage engine. On startup after an unclean shutdown,
     * the storage engine will drop any un-dropped temporary record stores.
     */
    virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(
        OperationContext* opCtx) = 0;

    /**
     * Creates a temporary RecordStore on the storage engine from an existing ident on disk. On
     * startup after an unclean shutdown, the storage engine will drop any un-dropped temporary
     * record stores.
     */
    virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
        OperationContext* opCtx, StringData ident) = 0;

    /**
     * This method will be called before there is a clean shutdown.  Storage engines should
     * override this method if they have clean-up to do that is different from unclean shutdown.
     * MongoDB will not call into the storage subsystem after calling this function.
     *
     * On error, the storage engine should assert and crash.
     * There is intentionally no uncleanShutdown().
     */
    virtual void cleanShutdown() = 0;

    /**
     * Returns the SnapshotManager for this StorageEngine or NULL if not supported.
     *
     * Pointer remains owned by the StorageEngine, not the caller.
     */
    virtual SnapshotManager* getSnapshotManager() const = 0;

    /**
     * Sets a new JournalListener, which is used by the storage engine to alert the rest of the
     * system about journaled write progress.
     *
     * This may only be set once.
     */
    virtual void setJournalListener(JournalListener* jl) = 0;

    /**
     * Returns whether the storage engine supports "recover to stable timestamp". Returns true
     * if the storage engine supports "recover to stable timestamp" but does not currently have
     * a stable timestamp. In that case StorageEngine::recoverToStableTimestamp() will return
     * a bad status.
     */
    virtual bool supportsRecoverToStableTimestamp() const = 0;

    /**
     * Returns whether the storage engine can provide a recovery timestamp.
     */
    virtual bool supportsRecoveryTimestamp() const = 0;

    /**
     * Returns true if the storage engine supports the readConcern level "snapshot".
     */
    virtual bool supportsReadConcernSnapshot() const = 0;

    virtual bool supportsReadConcernMajority() const = 0;

    /**
     * Returns true if the storage engine uses oplog stones to more finely control
     * deletion of oplog history, instead of the standard capped collection controls on
     * the oplog collection size.
     */
    virtual bool supportsOplogStones() const = 0;

    virtual bool supportsResumableIndexBuilds() const = 0;

    /**
     * Returns true if the storage engine supports deferring collection drops until the the storage
     * engine determines that the storage layer artifacts for the pending drops are no longer needed
     * based on the stable and oldest timestamps.
     */
    virtual bool supportsPendingDrops() const = 0;

    /**
     * Returns a set of drop pending idents inside the storage engine.
     */
    virtual std::set<std::string> getDropPendingIdents() const = 0;

    /**
     * Clears list of drop-pending idents in the storage engine.
     * Used primarily by rollback after recovering to a stable timestamp.
     */
    virtual void clearDropPendingState() = 0;

    /**
     * Adds 'ident' to a list of indexes/collections whose data will be dropped when:
     * - the dropTimestamp' is sufficiently old to ensure no future data accesses
     * - and no holders of 'ident' remain (the index/collection is no longer in active use)
     */
    virtual void addDropPendingIdent(const Timestamp& dropTimestamp,
                                     const NamespaceString& nss,
                                     std::shared_ptr<Ident> ident) = 0;

    /**
     * Recovers the storage engine state to the last stable timestamp. "Stable" in this case
     * refers to a timestamp that is guaranteed to never be rolled back. The stable timestamp
     * used should be one provided by StorageEngine::setStableTimestamp().
     *
     * The "local" database is exempt and should not roll back any state except for
     * "local.replset.minvalid" which must roll back to the last stable timestamp.
     *
     * If successful, returns the timestamp that the storage engine recovered to.
     *
     * fasserts if StorageEngine::supportsRecoverToStableTimestamp() would return
     * false. Returns a bad status if there is no stable timestamp to recover to.
     *
     * It is illegal to call this concurrently with `setStableTimestamp` or
     * `setInitialDataTimestamp`.
     */
    virtual StatusWith<Timestamp> recoverToStableTimestamp(OperationContext* opCtx) = 0;

    /**
     * Returns the stable timestamp that the storage engine recovered to on startup. If the
     * recovery point was not stable, returns "none".
     * fasserts if StorageEngine::supportsRecoverToStableTimestamp() would return false.
     */
    virtual boost::optional<Timestamp> getRecoveryTimestamp() const = 0;

    /**
     * Returns a timestamp that is guaranteed to exist on storage engine recovery to a stable
     * timestamp. This indicates when the storage engine can safely rollback to stable; and for
     * durable engines, it is also the guaranteed minimum stable recovery point on server restart
     * after crash or shutdown.
     *
     * fasserts if StorageEngine::supportsRecoverToStableTimestamp() would return false. Returns
     * boost::none if the recovery time has not yet been established. Replication recoverable
     * rollback may not succeed before establishment, and restart will require resync.
     */
    virtual boost::optional<Timestamp> getLastStableRecoveryTimestamp() const = 0;

    /**
     * Sets the highest timestamp at which the storage engine is allowed to take a checkpoint. This
     * timestamp must not decrease unless force=true is set, in which case we force the stable
     * timestamp, the oldest timestamp, and the commit timestamp backward.
     */
    virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) = 0;

    /**
     * Tells the storage engine the timestamp of the data at startup. This is necessary because
     * timestamps are not persisted in the storage layer.
     */
    virtual void setInitialDataTimestamp(Timestamp timestamp) = 0;

    /**
     * Returns the initial data timestamp.
     */
    virtual Timestamp getInitialDataTimestamp() const = 0;

    /**
     * Uses the current stable timestamp to set the oldest timestamp for which the storage engine
     * must maintain snapshot history through.
     *
     * oldest_timestamp will be set to stable_timestamp adjusted by
     * 'minSnapshotHistoryWindowInSeconds' to create a window of available snapshots on the
     * storage engine from oldest to stable. Furthermore, oldest_timestamp will never be set ahead
     * of the oplog read timestamp, ensuring the oplog reader's 'read_timestamp' can always be
     * serviced.
     */
    virtual void setOldestTimestampFromStable() = 0;

    /**
     * Sets the oldest timestamp for which the storage engine must maintain snapshot history
     * through. Additionally, all future writes must be newer or equal to this value.
     */
    virtual void setOldestTimestamp(Timestamp timestamp) = 0;

    /**
     * Gets the oldest timestamp for which the storage engine must maintain snapshot history
     * through.
     */
    virtual Timestamp getOldestTimestamp() const = 0;

    /**
     * Sets a callback which returns the timestamp of the oldest oplog entry involved in an
     * active MongoDB transaction. The storage engine calls this function to determine how much
     * oplog it must preserve.
     */
    virtual void setOldestActiveTransactionTimestampCallback(
        OldestActiveTransactionTimestampCallback callback) = 0;

    struct IndexIdentifier {
        const RecordId catalogId;
        const NamespaceString nss;
        const std::string indexName;
    };

    /*
     * ReconcileResult is the result of reconciling abandoned storage engine idents and unfinished
     * index builds.
     */
    struct ReconcileResult {
        // A list of IndexIdentifiers that must be rebuilt to completion.
        std::vector<IndexIdentifier> indexesToRebuild;

        // A map of unfinished two-phase indexes that must be restarted in the background, but
        // not to completion; they will wait for replicated commit or abort operations. This is a
        // mapping from index build UUID to index build.
        IndexBuilds indexBuildsToRestart;

        // List of index builds to be resumed. Each ResumeIndexInfo may contain multiple indexes to
        // resume as part of the same build.
        std::vector<ResumeIndexInfo> indexBuildsToResume;
    };

    /**
     * Drop abandoned idents. If successful, returns a ReconcileResult with indexes that need to be
     * rebuilt or builds that need to be restarted.
     *
     * Abandoned internal idents require special handling based on the context known only to the
     * caller. For example, on starting from a previous unclean shutdown, we would always drop all
     * unknown internal idents. If we started from a clean shutdown, the internal idents may contain
     * information for resuming index builds.
     */
    enum class InternalIdentReconcilePolicy { kDrop, kRetain };
    virtual StatusWith<ReconcileResult> reconcileCatalogAndIdents(
        OperationContext* opCtx, InternalIdentReconcilePolicy internalIdentReconcilePolicy) = 0;

    /**
     * Returns the all_durable timestamp. All transactions with timestamps earlier than the
     * all_durable timestamp are committed.
     *
     * The all_durable timestamp is the in-memory no holes point. That does not mean that there are
     * no holes behind it on disk. The all_durable timestamp also might not correspond with any
     * oplog entry, but instead have a timestamp value between that of two oplog entries.
     *
     * The all_durable timestamp only includes non-prepared transactions that have been given a
     * commit_timestamp and prepared transactions that have been given a durable_timestamp.
     * Previously, the deprecated all_committed timestamp would also include prepared transactions
     * that were prepared but not committed which could make the stable timestamp briefly jump back.
     *
     * Returns kMinimumTimestamp if there have been no new writes since the storage engine started.
     */
    virtual Timestamp getAllDurableTimestamp() const = 0;

    /**
     * Returns the oldest read timestamp in use by an open transaction. Storage engines that support
     * the 'snapshot' ReadConcern must provide an implementation. Other storage engines may provide
     * a no-op implementation.
     */
    virtual Timestamp getOldestOpenReadTimestamp() const = 0;

    /**
     * Returns the minimum possible Timestamp value in the oplog that replication may need for
     * recovery in the event of a crash.
     *
     * Returns boost::none when called on an ephemeral database.
     */
    virtual boost::optional<Timestamp> getOplogNeededForCrashRecovery() const = 0;

    /**
     * Returns the path to the directory which has the data files of database with `dbName`.
     */
    virtual std::string getFilesystemPathForDb(const std::string& dbName) const = 0;

    /**
     * Returns whethers the data files are compatible with the current code:
     *
     *   - Status::OK() if the data files are compatible with the current code.
     *
     *   - ErrorCodes::CanRepairToDowngrade if the data files are incompatible with the current
     *     code, but a --repair would make them compatible. For example, when rebuilding all indexes
     *     in the data files would resolve the incompatibility.
     *
     *   - ErrorCodes::MustUpgrade if the data files are incompatible with the current code and a
     *     newer version is required to start up.
     */
    virtual Status currentFilesCompatible(OperationContext* opCtx) const = 0;

    virtual int64_t sizeOnDiskForDb(OperationContext* opCtx, StringData dbName) = 0;

    virtual KVEngine* getEngine() = 0;
    virtual const KVEngine* getEngine() const = 0;
    virtual DurableCatalog* getCatalog() = 0;
    virtual const DurableCatalog* getCatalog() const = 0;
};

}  // namespace mongo