summaryrefslogtreecommitdiff
path: root/src/mongo/db/storage/recovery_unit.h
blob: e774193654415a41acad8535668f525c53021b4e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/**
 *    Copyright (C) 2014 MongoDB Inc.
 *
 *    This program is free software: you can redistribute it and/or  modify
 *    it under the terms of the GNU Affero General Public License, version 3,
 *    as published by the Free Software Foundation.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU Affero General Public License for more details.
 *
 *    You should have received a copy of the GNU Affero General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the GNU Affero General Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <cstdint>
#include <stdlib.h>
#include <string>

#include "mongo/base/disallow_copying.h"
#include "mongo/base/status.h"
#include "mongo/db/storage/snapshot.h"
#include "mongo/db/storage/snapshot_name.h"

namespace mongo {

class BSONObjBuilder;
class OperationContext;

/**
 * A RecoveryUnit is responsible for ensuring that data is persisted.
 * All on-disk information must be mutated through this interface.
 */
class RecoveryUnit {
    MONGO_DISALLOW_COPYING(RecoveryUnit);

public:
    virtual ~RecoveryUnit() {}

    /**
     * A storage engine may elect to append state information to the passed BSONObjBuilder.  This
     * function is not currently called by any MongoDB command, but may in the future.
     */
    virtual void reportState(BSONObjBuilder* b) const {}

    /**
     * These should be called through WriteUnitOfWork rather than directly.
     *
     * A call to 'beginUnitOfWork' marks the beginning of a unit of work. Each call to
     * 'beginUnitOfWork' must be matched with exactly one call to either 'commitUnitOfWork' or
     * 'abortUnitOfWork'. When 'abortUnitOfWork' is called, all changes made since the begin
     * of the unit of work will be rolled back.
     */
    virtual void beginUnitOfWork(OperationContext* opCtx) = 0;
    virtual void commitUnitOfWork() = 0;
    virtual void abortUnitOfWork() = 0;

    /**
     * Waits until all commits that happened before this call are durable. Returns true, unless the
     * storage engine cannot guarantee durability, which should never happen when isDurable()
     * returned true. This cannot be called from inside a unit of work, and should fail if it is.
     */
    virtual bool waitUntilDurable() = 0;

    /**
     * When this is called, if there is an open transaction, it is closed. On return no
     * transaction is active. This cannot be called inside of a WriteUnitOfWork, and should
     * fail if it is.
     */
    virtual void abandonSnapshot() = 0;

    /**
     * Informs this RecoveryUnit that all future reads through it should be from a snapshot
     * marked as Majority Committed. Snapshots should still be separately acquired and newer
     * committed snapshots should be used if available whenever implementations would normally
     * change snapshots.
     *
     * If no snapshot has yet been marked as Majority Committed, returns a status with error
     * code ReadConcernMajorityNotAvailableYet. After this returns successfully, at any point where
     * implementations attempt to acquire committed snapshot, if there are none available due to a
     * call to SnapshotManager::dropAllSnapshots(), a UserException with the same code should be
     * thrown.
     *
     * StorageEngines that don't support a SnapshotManager should use the default
     * implementation.
     */
    virtual Status setReadFromMajorityCommittedSnapshot() {
        return {ErrorCodes::CommandNotSupported,
                "Current storage engine does not support majority readConcerns"};
    }

    /**
     * Returns true if setReadFromMajorityCommittedSnapshot() has been called.
     */
    virtual bool isReadingFromMajorityCommittedSnapshot() const {
        return false;
    }

    /**
     * Returns the SnapshotName being used by this recovery unit or boost::none if not reading from
     * a majority committed snapshot.
     *
     * It is possible for reads to occur from later snapshots, but they may not occur from earlier
     * snapshots.
     */
    virtual boost::optional<SnapshotName> getMajorityCommittedSnapshot() const {
        dassert(!isReadingFromMajorityCommittedSnapshot());
        return {};
    }

    /**
     * Gets the local SnapshotId.
     *
     * It is only valid to compare SnapshotIds generated by a single RecoveryUnit.
     *
     * This is unrelated to SnapshotName which must be globally comparable.
     */
    virtual SnapshotId getSnapshotId() const = 0;

    /**
     * A Change is an action that is registerChange()'d while a WriteUnitOfWork exists. The
     * change is either rollback()'d or commit()'d when the WriteUnitOfWork goes out of scope.
     *
     * Neither rollback() nor commit() may fail or throw exceptions.
     *
     * Change implementors are responsible for handling their own locking, and must be aware
     * that rollback() and commit() may be called after resources with a shorter lifetime than
     * the WriteUnitOfWork have been freed. Each registered change will be committed or rolled
     * back once.
     */
    class Change {
    public:
        virtual ~Change() {}

        virtual void rollback() = 0;
        virtual void commit() = 0;
    };

    /**
     * The RecoveryUnit takes ownership of the change. The commitUnitOfWork() method calls the
     * commit() method of each registered change in order of registration. The endUnitOfWork()
     * method calls the rollback() method of each registered Change in reverse order of
     * registration. Either will unregister and delete the changes.
     *
     * The registerChange() method may only be called when a WriteUnitOfWork is active, and
     * may not be called during commit or rollback.
     */
    virtual void registerChange(Change* change) = 0;

    /**
     * Registers a callback to be called if the current WriteUnitOfWork rolls back.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename Callback>
    void onRollback(Callback callback) {
        class OnRollbackChange final : public Change {
        public:
            OnRollbackChange(Callback&& callback) : _callback(std::move(callback)) {}
            void rollback() final {
                _callback();
            }
            void commit() final {}

        private:
            Callback _callback;
        };

        registerChange(new OnRollbackChange(std::move(callback)));
    }

    /**
     * Registers a callback to be called if the current WriteUnitOfWork commits.
     *
     * Be careful about the lifetimes of all variables captured by the callback!
     */
    template <typename Callback>
    void onCommit(Callback callback) {
        class OnCommitChange final : public Change {
        public:
            OnCommitChange(Callback&& callback) : _callback(std::move(callback)) {}
            void rollback() final {}
            void commit() final {
                _callback();
            }

        private:
            Callback _callback;
        };

        registerChange(new OnCommitChange(std::move(callback)));
    }

    //
    // The remaining methods probably belong on DurRecoveryUnit rather than on the interface.
    //

    /**
     * Declare that the data at [x, x + len) is being written.
     */
    virtual void* writingPtr(void* data, size_t len) = 0;

    //
    // Syntactic sugar
    //

    /**
     * Declare write intent for an int
     */
    inline int& writingInt(int& d) {
        return *writing(&d);
    }

    /**
     * A templated helper for writingPtr.
     */
    template <typename T>
    inline T* writing(T* x) {
        writingPtr(x, sizeof(T));
        return x;
    }

    /**
     * Sets a flag that declares this RecoveryUnit will skip rolling back writes, for the
     * duration of the current outermost WriteUnitOfWork.  This function can only be called
     * between a pair of unnested beginUnitOfWork() / endUnitOfWork() calls.
     * The flag is cleared when endUnitOfWork() is called.
     * While the flag is set, rollback will skip rolling back writes, but custom rollback
     * change functions are still called.  Clearly, this functionality should only be used when
     * writing to temporary collections that can be cleaned up externally.  For example,
     * foreground index builds write to a temporary collection; if something goes wrong that
     * normally requires a rollback, we can instead clean up the index by dropping the entire
     * index.
     * Setting the flag may permit increased performance.
     */
    virtual void setRollbackWritesDisabled() = 0;

protected:
    RecoveryUnit() {}
};

}  // namespace mongo