summaryrefslogtreecommitdiff
path: root/src/mongo/util/fail_point.h
blob: f82d85785a5a1585fb59ce4fcb26613a5c492ce0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <functional>

#include "mongo/base/status_with.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/operation_context.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/stdx/mutex.h"

namespace mongo {

/**
 * A simple thread-safe fail point implementation that can be activated and
 * deactivated, as well as embed temporary data into it.
 *
 * The fail point has a static instance, which is represented by a FailPoint
 * object, and dynamic instance, which are all the threads in between
 * shouldFailOpenBlock and shouldFailCloseBlock.
 *
 * Sample use:
 * // Declared somewhere:
 * FailPoint makeBadThingsHappen;
 *
 * // Somewhere in the code
 * return false || MONGO_FAIL_POINT(makeBadThingsHappen);
 *
 * or
 *
 * // Somewhere in the code
 * MONGO_FAIL_POINT_BLOCK(makeBadThingsHappen, blockMakeBadThingsHappen) {
 *     const BSONObj& data = blockMakeBadThingsHappen.getData();
 *     // Do something
 * }
 *
 * Invariants:
 *
 * 1. Always refer to _fpInfo first to check if failPoint is active or not before
 *    entering fail point or modifying fail point.
 * 2. Client visible fail point states are read-only when active.
 */
class FailPoint {
    FailPoint(const FailPoint&) = delete;
    FailPoint& operator=(const FailPoint&) = delete;

public:
    typedef unsigned ValType;
    enum Mode { off, alwaysOn, random, nTimes, skip };
    enum RetCode { fastOff = 0, slowOff, slowOn, userIgnored };

    /**
     * Explicitly resets the seed used for the PRNG in this thread.  If not called on a thread,
     * an instance of SecureRandom is used to seed the PRNG.
     */
    static void setThreadPRNGSeed(int32_t seed);

    /**
     * Parses the FailPoint::Mode, FailPoint::ValType, and data BSONObj from the BSON.
     */
    static StatusWith<std::tuple<Mode, ValType, BSONObj>> parseBSON(const BSONObj& obj);

    FailPoint();

    /**
     * Note: This is not side-effect free - it can change the state to OFF after calling.
     * Note: see MONGO_FAIL_POINT_BLOCK_IF for information on the passed callable
     *
     * @return true if fail point is active.
     */
    template <typename Callable = std::nullptr_t>
    inline bool shouldFail(Callable&& cb = nullptr) {
        RetCode ret = shouldFailOpenBlock(std::forward<Callable>(cb));

        if (MONGO_likely(ret == fastOff)) {
            return false;
        }

        shouldFailCloseBlock();
        return ret == slowOn;
    }

    /**
     * Checks whether fail point is active and increments the reference counter without
     * decrementing it. Must call shouldFailCloseBlock afterwards when the return value
     * is not fastOff. Otherwise, this will remain read-only forever.
     *
     * Note: see MONGO_FAIL_POINT_BLOCK_IF for information on the passed callable
     *
     * @return slowOn if its active and needs to be closed
     *         userIgnored if its active and needs to be closed, but shouldn't be acted on
     *         slowOff if its disabled and needs to be closed
     *         fastOff if its disabled and doesn't need to be closed
     */
    template <typename Callable = std::nullptr_t>
    inline RetCode shouldFailOpenBlock(Callable&& cb = nullptr) {
        if (MONGO_likely((_fpInfo.loadRelaxed() & ACTIVE_BIT) == 0)) {
            return fastOff;
        }

        return slowShouldFailOpenBlock(std::forward<Callable>(cb));
    }

    /**
     * Decrements the reference counter.
     * @see #shouldFailOpenBlock
     */
    void shouldFailCloseBlock();

    /**
     * Changes the settings of this fail point. This will turn off the fail point
     * and waits for all dynamic instances referencing this fail point to go away before
     * actually modifying the settings.
     *
     * @param mode the new mode for this fail point.
     * @param val the value that can have different usage depending on the mode:
     *
     *     - off, alwaysOn: ignored
     *     - random: static_cast<int32_t>(std::numeric_limits<int32_t>::max() * p), where
     *           where p is the probability that any given evaluation of the failpoint should
     *           activate.
     *     - nTimes: the number of times this fail point will be active when
     *         #shouldFail or #shouldFailOpenBlock is called.
     *     - skip: the number of times this failpoint will be inactive when
     *         #shouldFail or #shouldFailOpenBlock is called. After this number is reached, the
     *         failpoint will always be active.
     *
     * @param extra arbitrary BSON object that can be stored to this fail point
     *     that can be referenced afterwards with #getData. Defaults to an empty
     *     document.
     */
    void setMode(Mode mode, ValType val = 0, const BSONObj& extra = BSONObj());

    /**
     * @returns a BSON object showing the current mode and data stored.
     */
    BSONObj toBSON() const;

private:
    static const ValType ACTIVE_BIT = 1 << 31;
    static const ValType REF_COUNTER_MASK = ~ACTIVE_BIT;

    // Bit layout:
    // 31: tells whether this fail point is active.
    // 0~30: unsigned ref counter for active dynamic instances.
    AtomicWord<unsigned> _fpInfo{0};

    // Invariant: These should be read only if ACTIVE_BIT of _fpInfo is set.
    Mode _mode{off};
    AtomicWord<int> _timesOrPeriod{0};
    BSONObj _data;

    // protects _mode, _timesOrPeriod, _data
    mutable stdx::mutex _modMutex;

    /**
     * Enables this fail point.
     */
    void enableFailPoint();

    /**
     * Disables this fail point.
     */
    void disableFailPoint();

    /**
     * slow path for #shouldFailOpenBlock
     *
     * If a callable is passed, and returns false, this will return userIgnored and avoid altering
     * the mode in any way.  The argument is the fail point payload.
     */
    RetCode slowShouldFailOpenBlock(std::function<bool(const BSONObj&)> cb) noexcept;

    /**
     * @return the stored BSONObj in this fail point. Note that this cannot be safely
     *      read if this fail point is off.
     */
    const BSONObj& getData() const;

    friend class ScopedFailPoint;
};

/**
 * Helper class for making sure that FailPoint#shouldFailCloseBlock is called when
 * FailPoint#shouldFailOpenBlock was called. This should only be used within the
 * MONGO_FAIL_POINT_BLOCK macro.
 */
class ScopedFailPoint {
    ScopedFailPoint(const ScopedFailPoint&) = delete;
    ScopedFailPoint& operator=(const ScopedFailPoint&) = delete;

public:
    template <typename Callable = std::nullptr_t>
    ScopedFailPoint(FailPoint* failPoint, Callable&& cb = nullptr) : _failPoint(failPoint) {
        FailPoint::RetCode ret = _failPoint->shouldFailOpenBlock(std::forward<Callable>(cb));
        _shouldClose = ret != FailPoint::fastOff;
        _shouldRun = ret == FailPoint::slowOn;
    }

    ~ScopedFailPoint() {
        if (_shouldClose) {
            _failPoint->shouldFailCloseBlock();
        }
    }

    /**
     * @return true if fail point is on. This will be true at most once.
     */
    inline bool isActive() {
        if (!_shouldRun) {
            return false;
        }

        // We use this in a for loop to prevent iteration, thus flipping to inactive after the first
        // time.
        _shouldRun = false;
        return true;
    }

    /**
     * @return the data stored in the fail point. #isActive must be true
     *     before you can call this.
     */
    const BSONObj& getData() const {
        // Assert when attempting to get data without incrementing ref counter.
        fassert(16445, _shouldClose);
        return _failPoint->getData();
    }

private:
    FailPoint* _failPoint;
    bool _shouldRun;
    bool _shouldClose;
};

#define MONGO_FAIL_POINT(symbol) MONGO_unlikely(symbol.shouldFail())

inline void MONGO_FAIL_POINT_PAUSE_WHILE_SET(FailPoint& failPoint) {
    while (MONGO_FAIL_POINT(failPoint)) {
        sleepmillis(100);
    }
}

inline void MONGO_FAIL_POINT_PAUSE_WHILE_SET_OR_INTERRUPTED(OperationContext* opCtx,
                                                            FailPoint& failPoint) {
    while (MONGO_FAIL_POINT(failPoint)) {
        opCtx->sleepFor(Milliseconds(100));
    }
}

/**
 * Macro for creating a fail point with block context. Also use this when
 * you want to access the data stored in the fail point.
 */
#define MONGO_FAIL_POINT_BLOCK(symbol, blockSymbol) \
    for (mongo::ScopedFailPoint blockSymbol(&symbol); MONGO_unlikely(blockSymbol.isActive());)

/**
 * Macro for creating a fail point with block context and a pre-flight condition. Also use this when
 * you want to access the data stored in the fail point.
 *
 * Your passed in callable should take a const BSONObj& (the fail point payload) and return bool.
 * If it returns true, you'll process the block as normal.  If you return false, you'll exit the
 * block without evaluating it and avoid altering the mode in any way (you won't consume nTimes for
 * instance).
 */
#define MONGO_FAIL_POINT_BLOCK_IF(symbol, blockSymbol, ...)        \
    for (mongo::ScopedFailPoint blockSymbol(&symbol, __VA_ARGS__); \
         MONGO_unlikely(blockSymbol.isActive());)

}  // namespace mongo