summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/recovery_unit.h
blob: 20c49c637d316dded1ffd211af9e74fe4b977f29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <cstdint>
#include <stdlib.h>
#include <string>

#include "mongo/base/status.h"
#include "mongo/bson/timestamp.h"
#include "mongo/db/repl/read_concern_level.h"
#include "mongo/db/storage/snapshot.h"
#include "mongo/util/decorable.h"

namespace mongo {

class BSONObjBuilder;
class OperationContext;

/**
 * The PrepareConflictBehavior specifies how operations should behave when encountering prepare
 * conflicts.
 */
enum class PrepareConflictBehavior {
    /**
     * When prepare conflicts are encountered, block until the conflict is resolved.
     */
    kEnforce,

    /**
     * Ignore prepare conflicts when they are encountered.
     *
     * When a prepared update is encountered, the previous version of a record will be returned.
     * This behavior can result in reading different versions of a record within the same snapshot
     * if the prepared update is committed during that snapshot. For this reason, operations that
     * ignore prepared updates may only perform reads. This is to prevent updating a record based on
     * an older version of itself, because a write conflict will not be generated in this scenario.
     */
    kIgnoreConflicts,

    /**
     * Ignore prepare conflicts when they are encountered, and allow operations to perform writes,
     * an exception to the rule of kIgnoreConflicts.
     *
     * This should only be used in cases where this is known to be impossible to perform writes
     * based on other prepared updates.
     */
    kIgnoreConflictsAllowWrites
};

/**
 * Storage statistics management class, with interfaces to provide the statistics in the BSON format
 * and an operator to add the statistics values.
 */
class StorageStats {
    StorageStats(const StorageStats&) = delete;
    StorageStats& operator=(const StorageStats&) = delete;

public:
    StorageStats() = default;

    virtual ~StorageStats(){};

    /**
     * Provides the storage statistics in the form of a BSONObj.
     */
    virtual BSONObj toBSON() = 0;

    /**
     * Add the statistics values.
     */
    virtual StorageStats& operator+=(const StorageStats&) = 0;

    /**
     * Provides the ability to create an instance of this class outside of the storage integration
     * layer.
     */
    virtual std::shared_ptr<StorageStats> getCopy() = 0;
};


/**
 * A RecoveryUnit is responsible for ensuring that data is persisted.
 * All on-disk information must be mutated through this interface.
 */
class RecoveryUnit : public Decorable<RecoveryUnit> {
    RecoveryUnit(const RecoveryUnit&) = delete;
    RecoveryUnit& operator=(const RecoveryUnit&) = delete;

public:
    // Behavior for abandonSnapshot().
    enum class AbandonSnapshotMode {
        kAbort,  // default
        kCommit
    };

    void commitRegisteredChanges(boost::optional<Timestamp> commitTimestamp);
    void abortRegisteredChanges();
    virtual ~RecoveryUnit() {}

    /**
     * Marks the beginning of a unit of work. Each call must be matched with exactly one call to
     * either commitUnitOfWork or abortUnitOfWork.
     *
     * When called with readOnly=true, no unit of work is started. Calling commitUnitOfWork or
     * abortUnitOfWork will invariant.
     *
     * Should be called through WriteUnitOfWork rather than directly.
     */
    void beginUnitOfWork(bool readOnly);

    /**
     * Marks the end of a unit of work and commits all changes registered by calls to onCommit or
     * registerChange, in order. Must be matched by exactly one preceding call to beginUnitOfWork.
     *
     * Should be called through WriteUnitOfWork rather than directly.
     */
    void commitUnitOfWork();

    /**
     * Marks the end of a unit of work and rolls back all changes registered by calls to onRollback
     * or registerChange, in reverse order. Must be matched by exactly one preceding call to
     * beginUnitOfWork.
     *
     * Should be called through WriteUnitOfWork rather than directly.
     */
    void abortUnitOfWork();

    /**
     * Cleans up any state set for this unit of work.
     *
     * Should be called through WriteUnitOfWork rather than directly.
     */
    void endReadOnlyUnitOfWork();

    /**
     * Transitions the active unit of work to the "prepared" state. Must be called after
     * beginUnitOfWork and before calling either abortUnitOfWork or commitUnitOfWork. Must be
     * overridden by storage engines that support prepared transactions.
     *
     * Must be preceded by a call to beginUnitOfWork and  setPrepareTimestamp, in that order.
     *
     * This cannot be called after setTimestamp or setCommitTimestamp.
     */
    virtual void prepareUnitOfWork() {
        uasserted(ErrorCodes::CommandNotSupported,
                  "This storage engine does not support prepared transactions");
    }

    /**
     * Sets the behavior of handling conflicts that are encountered due to prepared transactions, if
     * supported by this storage engine. See PrepareConflictBehavior.
     */
    virtual void setPrepareConflictBehavior(PrepareConflictBehavior behavior) {}

    /**
     * Returns the behavior of handling conflicts that are encountered due to prepared transactions.
     * Defaults to kEnforce if prepared transactions are not supported by this storage engine.
     */
    virtual PrepareConflictBehavior getPrepareConflictBehavior() const {
        return PrepareConflictBehavior::kEnforce;
    }

    /**
     * Dictates whether to round up prepare and commit timestamp of a prepared transaction. If set
     * to true, the prepare timestamp will be rounded up to the oldest timestamp if found to be
     * earlier; and the commit timestamp will be rounded up to the prepare timestamp if found to
     * be earlier.
     *
     * This must be called before a transaction begins, and defaults to false. On transaction close,
     * we reset the value to its default.
     *
     */
    virtual void setRoundUpPreparedTimestamps(bool value) {}

    /**
     * Waits until all commits that happened before this call are durable in the journal. Returns
     * true, unless the storage engine cannot guarantee durability, which should never happen when
     * the engine is non-ephemeral. This cannot be called from inside a unit of work, and should
     * fail if it is. This method invariants if the caller holds any locks, except for repair.
     *
     * Can throw write interruption errors from the JournalListener.
     */
    virtual bool waitUntilDurable(OperationContext* opCtx) = 0;

    /**
     * Unlike `waitUntilDurable`, this method takes a stable checkpoint, making durable any writes
     * on unjournaled tables that are behind the current stable timestamp. If the storage engine
     * is starting from an "unstable" checkpoint or 'stableCheckpoint'=false, this method call will
     * turn into an unstable checkpoint.
     *
     * This must not be called by a system taking user writes until after a stable timestamp is
     * passed to the storage engine.
     */
    virtual bool waitUntilUnjournaledWritesDurable(OperationContext* opCtx, bool stableCheckpoint) {
        return waitUntilDurable(opCtx);
    }

    /**
     * If there is an open transaction, it is closed. If the current AbandonSnapshotMode is
     * 'kAbort', the transaction is aborted. If the mode is 'kCommit' the transaction is committed,
     * and all data currently pointed to by cursors remains pinned until the cursors are
     * repositioned.

     * On return no transaction is active. It is a programming error to call this inside of a
     * WriteUnitOfWork, even if the AbandonSnapshotMode is 'kCommit'.
     */
    void abandonSnapshot();

    void setAbandonSnapshotMode(AbandonSnapshotMode mode) {
        _abandonSnapshotMode = mode;
    }
    AbandonSnapshotMode abandonSnapshotMode() const {
        return _abandonSnapshotMode;
    }

    /**
     * Sets the OperationContext that currently owns this RecoveryUnit. Should only be called by the
     * OperationContext.
     */
    void setOperationContext(OperationContext* opCtx);

    /**
     * Informs the RecoveryUnit that a snapshot will be needed soon, if one was not already
     * established. This specifically allows the storage engine to preallocate any required
     * transaction resources while minimizing the critical section between generating a new
     * timestamp and setting it using setTimestamp.
     */
    virtual void preallocateSnapshot() {}

    /**
     * Like preallocateSnapshot() above but also indicates that the snapshot will be used for
     * reading the oplog.
     *
     * StorageEngines may not implement this in which case it works like preallocateSnapshot.
     */
    virtual void preallocateSnapshotForOplogRead() {
        preallocateSnapshot();
    }

    /**
     * Returns whether or not a majority commmitted snapshot is available. If no snapshot has yet
     * been marked as Majority Committed, returns a status with error code
     * ReadConcernMajorityNotAvailableYet. After this returns successfully, at any point where
     * implementations attempt to acquire committed snapshot, if there are none available due to a
     * call to SnapshotManager::clearCommittedSnapshot(), a AssertionException with the same code
     * should be thrown.
     *
     * StorageEngines that don't support a SnapshotManager should use the default
     * implementation.
     */
    virtual Status majorityCommittedSnapshotAvailable() const {
        return {ErrorCodes::CommandNotSupported,
                "Current storage engine does not support majority readConcerns"};
    }

    /**
     * Returns the Timestamp being used by this recovery unit or boost::none if not reading from
     * a point in time. Any point in time returned will reflect one of the following:
     *  - when using ReadSource::kProvided, the timestamp provided.
     *  - when using ReadSource::kNoOverlap, the timestamp chosen by the storage engine.
     *  - when using ReadSource::kAllDurableSnapshot, the timestamp chosen using the storage
     * engine's all_durable timestamp.
     *  - when using ReadSource::kLastAppplied, the last applied timestamp. Can return boost::none
     * if no timestamp has been established.
     *  - when using ReadSource::kMajorityCommitted, the majority committed timestamp chosen by the
     * storage engine after a transaction has been opened.
     *
     * This may passively start a storage engine transaction to establish a read timestamp.
     */
    virtual boost::optional<Timestamp> getPointInTimeReadTimestamp(OperationContext* opCtx) {
        return boost::none;
    }

    /**
     * Gets the local SnapshotId.
     *
     * It is only valid to compare SnapshotIds generated by a single RecoveryUnit.
     *
     * This is unrelated to Timestamp which must be globally comparable.
     */
    SnapshotId getSnapshotId() const {
        return SnapshotId{_mySnapshotId};
    }

    /**
     * Sets a timestamp to assign to future writes in a transaction.
     * All subsequent writes will be assigned this timestamp.
     * If setTimestamp() is called again, specifying a new timestamp, future writes will use this
     * new timestamp but past writes remain with their originally assigned timestamps.
     * Writes that occur before any setTimestamp() is called will be assigned the timestamp
     * specified in the last setTimestamp() call in the transaction, at commit time.
     *
     * setTimestamp() will fail if a commit timestamp is set using setCommitTimestamp() and not
     * yet cleared with clearCommitTimestamp(). setTimestamp() will also fail if a prepareTimestamp
     * has been set.
     */
    virtual Status setTimestamp(Timestamp timestamp) {
        return Status::OK();
    }

    /**
     * Returns true if a commit timestamp has been assigned to writes in this transaction.
     * Otherwise, returns false.
     */
    virtual bool isTimestamped() const {
        return false;
    }

    /**
     * Sets a timestamp that will be assigned to all future writes on this RecoveryUnit until
     * clearCommitTimestamp() is called. This must be called either outside of a WUOW or on a
     * prepared transaction after setPrepareTimestamp() is called. setTimestamp() must not be called
     * while a commit timestamp is set.
     */
    virtual void setCommitTimestamp(Timestamp timestamp) {}

    /**
     * Sets a timestamp that decides when all the future writes on this RecoveryUnit will be
     * durable.
     */
    virtual void setDurableTimestamp(Timestamp timestamp) {}

    /**
     * Clears the commit timestamp that was set by setCommitTimestamp(). This must be called outside
     * of a WUOW. This must be called when a commit timestamp is set.
     */
    virtual void clearCommitTimestamp() {}

    /**
     * Returns the commit timestamp. Can be called at any time.
     */
    virtual Timestamp getCommitTimestamp() const {
        return {};
    }

    /**
     * Returns the durable timestamp.
     */
    virtual Timestamp getDurableTimestamp() const {
        return {};
    }

    /**
     * Sets a prepare timestamp for the current transaction. A subsequent call to
     * prepareUnitOfWork() is expected and required.
     * This cannot be called after setTimestamp or setCommitTimestamp.
     * This must be called inside a WUOW and may only be called once.
     */
    virtual void setPrepareTimestamp(Timestamp timestamp) {
        uasserted(ErrorCodes::CommandNotSupported,
                  "This storage engine does not support prepared transactions");
    }

    /**
     * Returns the prepare timestamp for the current transaction.
     * Must be called after setPrepareTimestamp(), and cannot be called after setTimestamp() or
     * setCommitTimestamp(). This must be called inside a WUOW.
     */
    virtual Timestamp getPrepareTimestamp() const {
        uasserted(ErrorCodes::CommandNotSupported,
                  "This storage engine does not support prepared transactions");
    }

    /**
     * Sets catalog conflicting timestamp.
     * This cannot be called after WiredTigerRecoveryUnit::_txnOpen.
     *
     * This value must be set when both of the following conditions are true:
     * - A storage engine snapshot is opened without a read timestamp
     * (RecoveryUnit::ReadSource::kNoTimestamp).
     * - The transaction may touch collections it does not yet have locks for.
     * In this circumstance, the catalog conflicting timestamp serves as a substitute for a read
     * timestamp. This value must be set to a valid (i.e: no-holes) read timestamp prior to
     * acquiring a storage engine snapshot. This timestamp will be used to determine if any changes
     * had happened to the in-memory catalog after a storage engine snapshot got opened for that
     * transaction.
     */
    virtual void setCatalogConflictingTimestamp(Timestamp timestamp) {}

    /**
     * Returns the catalog conflicting timestamp.
     */
    virtual Timestamp getCatalogConflictingTimestamp() const {
        return {};
    }

    /**
     * MongoDB must update documents with non-decreasing timestamp values. A storage engine is
     * allowed to assert when this contract is violated. An untimestamped write is a subset of these
     * violations, which may be necessary in limited circumstances. This API can be called before a
     * transaction begins to suppress this subset of errors.
     */
    virtual void allowUntimestampedWrite() {}

    /**
     * Fetches the storage level statistics.
     */
    virtual std::shared_ptr<StorageStats> getOperationStatistics() const {
        return (nullptr);
    }

    /**
     * The ReadSource indicates which external or provided timestamp to read from for future
     * transactions.
     */
    enum ReadSource {
        /**
         * Read without a timestamp. This is the default.
         */
        kNoTimestamp,
        /**
         * Read from the majority all-committed timestamp.
         */
        kMajorityCommitted,
        /**
         * Read from the latest timestamp where no future transactions will commit at or before.
         */
        kNoOverlap,
        /**
         * Read from the lastApplied timestamp.
         */
        kLastApplied,
        /**
         * Read from the all_durable timestamp. New transactions will always read from the same
         * timestamp and never advance.
         */
        kAllDurableSnapshot,
        /**
         * Read from the timestamp provided to setTimestampReadSource.
         */
        kProvided,
        /**
         * Read from the latest checkpoint.
         */
        kCheckpoint
    };

    static std::string toString(ReadSource rs) {
        switch (rs) {
            case ReadSource::kNoTimestamp:
                return "kNoTimestamp";
            case ReadSource::kMajorityCommitted:
                return "kMajorityCommitted";
            case ReadSource::kNoOverlap:
                return "kNoOverlap";
            case ReadSource::kLastApplied:
                return "kLastApplied";
            case ReadSource::kAllDurableSnapshot:
                return "kAllDurableSnapshot";
            case ReadSource::kProvided:
                return "kProvided";
            case ReadSource::kCheckpoint:
                return "kCheckpoint";
        }
        MONGO_UNREACHABLE;
    }

    /**
     * Sets which timestamp to use for read transactions. If 'provided' is supplied, only kProvided
     * is an acceptable input.
     *
     * Must be called in one of the following cases:
     * - a transaction is not active
     * - no read source has been set yet
     * - the read source provided is the same as the existing read source
     */
    virtual void setTimestampReadSource(ReadSource source,
                                        boost::optional<Timestamp> provided = boost::none) {}

    virtual ReadSource getTimestampReadSource() const {
        return ReadSource::kNoTimestamp;
    };

    virtual boost::optional<int64_t> getOplogVisibilityTs() {
        return boost::none;
    }

    /**
     * Pinning informs callers not to change the ReadSource on this RecoveryUnit. Callers are
     * expected to first check isReadSourcePinned before attempting to change the ReadSource. An
     * error may occur otherwise.
     * See also `PinReadSourceBlock` for a RAII-style solution.
     */
    virtual void pinReadSource() {}
    virtual void unpinReadSource() {}
    virtual bool isReadSourcePinned() const {
        return false;
    }

    /**
     * Sets whether this operation intends to perform reads that do not need to keep data in the
     * storage engine cache. This can be useful for operations that do large, one-time scans of
     * data, and will attempt to keep higher-priority data from being evicted from the cache. This
     * may not be called in an active transaction.
     */
    virtual void setReadOnce(bool readOnce){};

    virtual bool getReadOnce() const {
        return false;
    };

    /**
     * Indicates whether the RecoveryUnit has an open snapshot. A snapshot can be opened inside or
     * outside of a WriteUnitOfWork.
     */
    virtual bool isActive() const {
        return _isActive();
    };

    /**
     * When called, the WriteUnitOfWork ignores the multi timestamp constraint for the remainder of
     * the WriteUnitOfWork, where if within a WriteUnitOfWork multiple timestamps are set, the first
     * timestamp must be set prior to any writes.
     *
     * Must be reset when the WriteUnitOfWork is either committed or rolled back.
     */
    virtual void ignoreAllMultiTimestampConstraints(){};

    /**
     * Registers a callback to be called prior to a WriteUnitOfWork committing the storage
     * transaction. This callback may throw a WriteConflictException which will abort the
     * transaction.
     */
    virtual void registerPreCommitHook(std::function<void(OperationContext*)> callback);

    virtual void runPreCommitHooks(OperationContext* opCtx);

    /**
     * A Change is an action that is registerChange()'d while a WriteUnitOfWork exists. The
     * change is either rollback()'d or commit()'d when the WriteUnitOfWork goes out of scope.
     *
     * Neither rollback() nor commit() may fail or throw exceptions. Acquiring locks or blocking
     * operations should not be performed in these handlers, as it may lead to deadlocks.
     * LockManager locks are still held due to 2PL.
     *
     * Change implementors are responsible for handling their own synchronization, and must be aware
     * that rollback() and commit() may be called out of line and after the WriteUnitOfWork have
     * been freed. Pointers or references to stack variables should not be bound to the definitions
     * of rollback() or commit(). Each registered change will be committed or rolled back once.
     *
     * commit() handlers are passed the timestamp at which the transaction is committed. If the
     * transaction is not committed at a particular timestamp, or if the storage engine does not
     * support timestamps, then boost::none will be supplied for this parameter.
     *
     * The OperationContext provided in commit() and rollback() handlers is the current
     * OperationContext and may not be the same as when the Change was registered on the
     * RecoveryUnit. See above for usage restrictions.
     */
    class Change {
    public:
        virtual ~Change() {}

        virtual void rollback(OperationContext* opCtx) = 0;
        virtual void commit(OperationContext* opCtx, boost::optional<Timestamp> commitTime) = 0;
    };

    /**
     * A SnapshotChange is an action that can be registered at anytime. When a WriteUnitOfWork
     * begins, the openSnapshot() callback is called for any registered snapshot changes. Similarly,
     * when the snapshot is abandoned, or the WriteUnitOfWork is committed or aborted, the
     * closeSnapshot() callback is called.
     *
     * The same rules apply here that apply to the Change class.
     */
    class SnapshotChange {
    public:
        virtual ~SnapshotChange() {}

        virtual void openSnapshot(OperationContext* opCtx) = 0;
        virtual void closeSnapshot(OperationContext* opCtx) = 0;
    };

    /**
     * The commitUnitOfWork() method calls the commit() method of each registered change in order of
     * registration. The endUnitOfWork() method calls the rollback() method of each registered
     * Change in reverse order of registration. Either will unregister and delete the changes.
     *
     * The registerChange() method may only be called when a WriteUnitOfWork is active, and
     * may not be called during commit or rollback.
     */
    void registerChange(std::unique_ptr<Change> change);

    /**
     * Registers a change with the given rollback and commit functions.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename RollbackCallback, typename CommitCallback>
    void registerChange(CommitCallback commit, RollbackCallback rollback) {
        class CallbackChange final : public Change {
        public:
            CallbackChange(CommitCallback&& commit, RollbackCallback&& rollback)
                : _rollback(std::move(rollback)), _commit(std::move(commit)) {}
            void rollback(OperationContext* opCtx) final {
                _rollback();
            }
            void commit(OperationContext* opCtx, boost::optional<Timestamp> ts) final {
                _commit(ts);
            }

        private:
            RollbackCallback _rollback;
            CommitCallback _commit;
        };

        registerChange(std::make_unique<CallbackChange>(std::move(commit), std::move(rollback)));
    }

    /**
     * Like registerChange() above but should only be used to make new state visible in the
     * in-memory catalog. Only one change of this kind may be registered at a given time to ensure
     * catalog updates are atomic. Change registered with this function will commit after the commit
     * changes registered with registerChange and rollback will run before the rollback changes
     * registered with registerChange.
     *
     * This separation ensures that regular Changes that can modify state are run before the Change
     * to install the new state in the in-memory catalog, after which there should be no further
     * changes.
     */
    void registerChangeForCatalogVisibility(std::unique_ptr<Change> change);

    /**
     * Returns true if a change has been registered with registerChangeForCatalogVisibility() above.
     */
    bool hasRegisteredChangeForCatalogVisibility();

    /**
     * Registers a callback to be called if the current WriteUnitOfWork rolls back.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename Callback>
    void onRollback(Callback callback) {
        class OnRollbackChange final : public Change {
        public:
            OnRollbackChange(Callback&& callback) : _callback(std::move(callback)) {}
            void rollback(OperationContext* opCtx) final {
                _callback();
            }
            void commit(OperationContext* opCtx, boost::optional<Timestamp>) final {}

        private:
            Callback _callback;
        };

        registerChange(std::make_unique<OnRollbackChange>(std::move(callback)));
    }

    /**
     * Registers a callback to be called if the current WriteUnitOfWork commits.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename Callback>
    void onCommit(Callback callback) {
        class OnCommitChange final : public Change {
        public:
            OnCommitChange(Callback&& callback) : _callback(std::move(callback)) {}
            void rollback(OperationContext* opCtx) final {}
            void commit(OperationContext* opCtx, boost::optional<Timestamp> commitTime) final {
                _callback(commitTime);
            }

        private:
            Callback _callback;
        };

        registerChange(std::make_unique<OnCommitChange>(std::move(callback)));
    }

    /**
     * Registers a callback to be called when the snapshot is opened.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename Callback>
    void onOpenSnapshot(Callback cb) {
        class OnOpenSnapshotChange final : public SnapshotChange {
        public:
            OnOpenSnapshotChange(Callback&& callback) : _callback(std::move(callback)) {}
            void openSnapshot(OperationContext* opCtx) final {
                _callback(opCtx);
            }
            void closeSnapshot(OperationContext* opCtx) final {}

        private:
            Callback _callback;
        };

        _registerSnapshotChange(std::make_unique<OnOpenSnapshotChange>(std::move(cb)));
    }

    /**
     * Registers a callback to be called when the snapshot is closed.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename Callback>
    void onCloseSnapshot(Callback cb) {
        class OnCloseSnapshotChange final : public SnapshotChange {
        public:
            OnCloseSnapshotChange(Callback&& callback) : _callback(std::move(callback)) {}
            void openSnapshot(OperationContext* opCtx) final {}
            void closeSnapshot(OperationContext* opCtx) final {
                _callback(opCtx);
            }

        private:
            Callback _callback;
        };

        _registerSnapshotChange(std::make_unique<OnCloseSnapshotChange>(std::move(cb)));
    }

    virtual void setOrderedCommit(bool orderedCommit) = 0;

    /**
     * State transitions:
     *
     *   /------------------------> Inactive <-----------------------------\
     *   |                             |                                   |
     *   |                             |                                   |
     *   |              /--------------+--------------\                    |
     *   |              |                             |                    | abandonSnapshot()
     *   |              |                             |                    |
     *   |   beginUOW() |                             | _txnOpen()         |
     *   |              |                             |                    |
     *   |              V                             V                    |
     *   |    InactiveInUnitOfWork          ActiveNotInUnitOfWork ---------/
     *   |              |                             |
     *   |              |                             |
     *   |   _txnOpen() |                             | beginUOW()
     *   |              |                             |
     *   |              \--------------+--------------/
     *   |                             |
     *   |                             |
     *   |                             V
     *   |                           Active
     *   |                             |
     *   |                             |
     *   |              /--------------+--------------\
     *   |              |                             |
     *   |              |                             |
     *   |   abortUOW() |                             | commitUOW()
     *   |              |                             |
     *   |              V                             V
     *   |          Aborting                      Committing
     *   |              |                             |
     *   |              |                             |
     *   |              |                             |
     *   \--------------+-----------------------------/
     *
     */
    enum class State {
        kInactive,
        kInactiveInUnitOfWork,
        kActiveNotInUnitOfWork,
        kActive,
        kAborting,
        kCommitting,
    };

    static std::string toString(State state) {
        switch (state) {
            case State::kInactive:
                return "Inactive";
            case State::kInactiveInUnitOfWork:
                return "InactiveInUnitOfWork";
            case State::kActiveNotInUnitOfWork:
                return "ActiveNotInUnitOfWork";
            case State::kActive:
                return "Active";
            case State::kCommitting:
                return "Committing";
            case State::kAborting:
                return "Aborting";
        }
        MONGO_UNREACHABLE;
    }

    /**
     * Exposed for debugging purposes.
     */
    State getState() {
        return _getState();
    }

    void setNoEvictionAfterRollback() {
        _noEvictionAfterRollback = true;
    }

    bool getNoEvictionAfterRollback() const {
        return _noEvictionAfterRollback;
    }

    /**
     * Returns true if this is an instance of RecoveryUnitNoop.
     */
    virtual bool isNoop() const {
        return false;
    }

protected:
    RecoveryUnit();

    /**
     * Returns the current state.
     */
    State _getState() const {
        return _state;
    }

    /**
     * Transitions to new state.
     *
     * Invokes openSnapshot() for all registered snapshot changes when transitioning to kActive or
     * kActiveNotInUnitOfWork from an inactive state.
     */
    void _setState(State newState);

    /**
     * Returns true if active.
     */
    bool _isActive() const {
        return State::kActiveNotInUnitOfWork == _state || State::kActive == _state;
    }

    /**
     * Returns true if currently managed by a WriteUnitOfWork.
     */
    bool _inUnitOfWork() const {
        return State::kInactiveInUnitOfWork == _state || State::kActive == _state;
    }

    /**
     * Returns true if currently running commit or rollback handlers
     */
    bool _isCommittingOrAborting() const {
        return State::kCommitting == _state || State::kAborting == _state;
    }

    /**
     * Executes all registered commit handlers and clears all registered changes
     */
    void _executeCommitHandlers(boost::optional<Timestamp> commitTimestamp);

    /**
     * Executes all registered rollback handlers and clears all registered changes
     */
    void _executeRollbackHandlers();

    /**
     * Executes all registered open snapshot handlers. This does not clear any registered snapshot
     * changes in order to keep the close snapshot handlers around until the snapshot is closed.
     */
    void _executeOpenSnapshotHandlers();

    /**
     * Executes all registered close snapshot handlers and clears all registered snapshot changes.
     */
    void _executeCloseSnapshotHandlers();

    bool _noEvictionAfterRollback = false;

    AbandonSnapshotMode _abandonSnapshotMode = AbandonSnapshotMode::kAbort;

private:
    // Sets the snapshot associated with this RecoveryUnit to a new globally unique id number.
    void assignNextSnapshotId();

    virtual void doBeginUnitOfWork() = 0;
    virtual void doAbandonSnapshot() = 0;
    virtual void doCommitUnitOfWork() = 0;
    virtual void doAbortUnitOfWork() = 0;

    virtual void validateInUnitOfWork() const;

    /**
     * The beginUnitOfWork() method calls  the openSnapshot() method. The abandonSnapshot(),
     * commitUnitOfWork(), and abortUnitOfWork() methods call the closeSnapshot() method. All
     * registered snapshot changes are called in the order of registration when a snapshot is opened
     * and in reverse order when a snapshot is closed.
     *
     * This method may be called outside or inside of a WriteUnitOfWork.
     */
    void _registerSnapshotChange(std::unique_ptr<SnapshotChange> snapshotChange) {
        _snapshotChanges.push_back(std::move(snapshotChange));
    }

    std::vector<std::function<void(OperationContext*)>> _preCommitHooks;

    typedef std::vector<std::unique_ptr<Change>> Changes;
    Changes _changes;
    typedef std::vector<std::unique_ptr<SnapshotChange>> SnapshotChanges;
    SnapshotChanges _snapshotChanges;
    std::unique_ptr<Change> _changeForCatalogVisibility;
    State _state = State::kInactive;
    OperationContext* _opCtx = nullptr;
    uint64_t _mySnapshotId;
    bool _readOnly = false;
};

/**
 * RAII-style class to manage pinning and unpinning the readSource.
 */
class PinReadSourceBlock {
    PinReadSourceBlock(const PinReadSourceBlock&) = delete;
    PinReadSourceBlock& operator=(const PinReadSourceBlock&) = delete;

public:
    explicit PinReadSourceBlock(RecoveryUnit* recoveryUnit) : _recoveryUnit(recoveryUnit) {
        _recoveryUnit->pinReadSource();
    }

    ~PinReadSourceBlock() {
        _recoveryUnit->unpinReadSource();
    }

private:
    RecoveryUnit* const _recoveryUnit;
};

}  // namespace mongo