summaryrefslogtreecommitdiff
path: root/src/mongo/util/fail_point.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/util/fail_point.h')
-rw-r--r--src/mongo/util/fail_point.h266
1 files changed, 205 insertions, 61 deletions
diff --git a/src/mongo/util/fail_point.h b/src/mongo/util/fail_point.h
index daf39bcba49..c5c12e62c23 100644
--- a/src/mongo/util/fail_point.h
+++ b/src/mongo/util/fail_point.h
@@ -30,48 +30,78 @@
#pragma once
#include <functional>
+#include <string>
+#include "mongo/base/status.h"
#include "mongo/base/status_with.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/operation_context.h"
#include "mongo/platform/atomic_word.h"
#include "mongo/platform/mutex.h"
+#include "mongo/stdx/unordered_map.h"
namespace mongo {
/**
- * A simple thread-safe fail point implementation that can be activated and
- * deactivated, as well as embed temporary data into it.
*
- * The fail point has a static instance, which is represented by a FailPoint
- * object, and dynamic instance, represented by FailPoint::Scoped handles.
+ * A FailPoint is a hook mechanism allowing testing behavior to occur at prearranged
+ * execution points in the server code. They can be activated and deactivated, and
+ * configured to hold data.
+ *
+ * A FailPoint is usually defined by the MONGO_FAIL_POINT_DEFINE(name) macro,
+ * which arranges for it to be added to the global failpoint registry.
*
* Sample use:
- * // Declared somewhere:
- * FailPoint makeBadThingsHappen;
*
- * // Somewhere in the code
- * return false || MONGO_unlikely(makeBadThingsHappen.shouldFail());
+ * // Defined somewhere:
+ * MONGO_FAIL_POINT_DEFINE(failPoint);
+ *
+ * bool somewhereInTheCode() {
+ * ... do some stuff ...
+ * // The failpoint artificially changes the return value of this function when active.
+ * if (MONGO_unlikely(failPoint.shouldFail()))
+ * return false;
+ * return true;
+ * }
*
- * or
+ * - or to implement more complex scenarios, use execute/executeIf -
*
- * // Somewhere in the code
- * makeBadThingsHappen.execute([&](const BSONObj& data) {
- * // Do something
- * });
+ * bool somewhereInTheCode() {
+ * failPoint.execute([&](const BSONObj& data) {
+ * // The bad things happen here, and can read the injected 'data'.
+ * });
+ * return true;
+ * }
*
- * // Another way to do it, where lambda isn't suitable, e.g. to cause an early return
- * // of the enclosing function.
- * if (auto sfp = makeBadThingsHappen.scoped(); MONGO_unlikely(sfp.isActive())) {
- * const BSONObj& data = sfp.getData();
- * // Do something, including break, continue, return, etc...
+ * // scoped() is another way to do it, where lambda isn't suitable, e.g. to cause
+ * // a return/continue/break to control the enclosing function.
+ * for (auto& user : users) {
+ * // The failpoint can be activated and given a user name, to skip that user.
+ * if (auto sfp = failPoint.scoped(); MONGO_unlikely(sfp.isActive())) {
+ * if (sfp.getData()["user"] == user.name()) {
+ * continue;
+ * }
+ * }
+ * processOneUser(user);
* }
*
- * Invariants:
+ * // Rendered compactly with scopedIf where the data serves as an activation filter.
+ * for (auto& user : users) {
+ * if (MONGO_unlikely(failPoint.scopedIf([&](auto&& o) {
+ * return o["user"] == user.name();
+ * }).isActive())) {
+ * continue;
+ * }
+ * processOneUser(user);
+ * }
+ *
+ * The `scopedIf` and `executeIf` members have an advantage over `scoped` and `execute`. They
+ * only affect the `FailPoint` activation counters (relevant to the `nTimes` and `skip` modes)
+ * if the predicate is true.
+ *
+ * A FailPoint can be configured remotely by a database command.
+ * See `src/mongo/db/commands/fail_point_cmd.cpp`.
*
- * 1. Always refer to _fpInfo first to check if failPoint is active or not before
- * entering fail point or modifying fail point.
- * 2. Client visible fail point states are read-only when active.
*/
class FailPoint {
private:
@@ -88,11 +118,18 @@ public:
};
/**
- * Helper class for making sure that FailPoint#shouldFailCloseBlock is called when
- * FailPoint#shouldFailOpenBlock was called.
- *
+ * An object representing an active FailPoint's interaction with the code it is
+ * instrumenting. It holds reference to its associated FailPoint, ensuring
+ * that FailPoint's state doesn't change while a Scoped is attached to it.
+ * If `isActive()`, then `getData()` may be called to retrieve injected data.
* Users don't create these. They are only used within the execute and executeIf
* functions and returned by the scoped() and scopedIf() functions.
+ *
+ * Ex:
+ * if (auto scoped = failPoint.scoped(); scoped.isActive()) {
+ * const BSONObj& data = scoped.getData();
+ * // failPoint injects some behavior, informed by `data`.
+ * }
*/
class Scoped {
public:
@@ -103,7 +140,7 @@ public:
~Scoped() {
if (_holdsRef) {
- _failPoint->shouldFailCloseBlock();
+ _failPoint->_shouldFailCloseBlock();
}
}
@@ -125,7 +162,7 @@ public:
const BSONObj& getData() const {
// Assert when attempting to get data without holding a ref.
fassert(16445, _holdsRef);
- return _failPoint->getData();
+ return _failPoint->_getData();
}
private:
@@ -141,7 +178,18 @@ public:
static void setThreadPRNGSeed(int32_t seed);
/**
- * Parses the {Mode, ValType, BSONObj} from the BSON.
+ * Parses the {mode, val, extra} from the BSON.
+ * obj = {
+ * mode: modeElem // required
+ * data: extra // optional payload to inject into the FailPoint intercept site.
+ * }
+ * where `modeElem` is one of:
+ * "off"
+ * "alwaysOn"
+ * {"times" : val} // active for the next val calls
+ * {"skip" : val} // skip calls, activate on and after call number (val+1).
+ * {"activationProbability" : val} // val is in interval [0.0, 1.0]
+ * }
*/
static StatusWith<ModeOptions> parseBSON(const BSONObj& obj);
@@ -151,22 +199,25 @@ public:
FailPoint& operator=(const FailPoint&) = delete;
/**
- * Note: This is not side-effect free - it can change the state to OFF after calling.
- * Note: see `executeIf` for information on `pred`.
+ * Returns true if fail point is active.
+ *
+ * Calls to `shouldFail` can have side effects. For example they affect the counters
+ * kept to manage the `skip` or `nTimes` modes (See `setMode`).
*
- * Calls to shouldFail should be placed inside MONGO_unlikely for performance.
+ * See `executeIf` for information on `pred`.
*
- * @return true if fail point is active.
+ * Calls to `shouldFail` should be placed inside MONGO_unlikely for performance.
+ * if (MONGO_unlikely(failpoint.shouldFail())) ...
*/
template <typename Pred>
bool shouldFail(Pred&& pred) {
- RetCode ret = shouldFailOpenBlock(std::forward<Pred>(pred));
+ RetCode ret = _shouldFailOpenBlock(std::forward<Pred>(pred));
if (MONGO_likely(ret == fastOff)) {
return false;
}
- shouldFailCloseBlock();
+ _shouldFailCloseBlock();
return ret == slowOn;
}
@@ -175,22 +226,20 @@ public:
}
/**
- * Changes the settings of this fail point. This will turn off the fail point
- * and waits for all dynamic instances referencing this fail point to go away before
- * actually modifying the settings.
+ * Changes the settings of this fail point. This will turn off the FailPoint and
+ * wait for all references on this FailPoint to go away before modifying it.
*
- * @param mode the new mode for this fail point.
- * @param val the value that can have different usage depending on the mode:
+ * @param mode new mode
+ * @param val unsigned having different interpretations depending on the mode:
*
* - off, alwaysOn: ignored
* - random: static_cast<int32_t>(std::numeric_limits<int32_t>::max() * p), where
* where p is the probability that any given evaluation of the failpoint should
* activate.
* - nTimes: the number of times this fail point will be active when
- * #shouldFail or #shouldFailOpenBlock is called.
- * - skip: the number of times this failpoint will be inactive when
- * #shouldFail or #shouldFailOpenBlock is called. After this number is reached, the
- * failpoint will always be active.
+ * #shouldFail/#execute/#scoped are called.
+ * - skip: will become active and remain active after
+ * #shouldFail/#execute/#scoped are called this number of times.
*
* @param extra arbitrary BSON object that can be stored to this fail point
* that can be referenced afterwards with #getData. Defaults to an empty
@@ -208,18 +257,24 @@ public:
/**
* Create a Scoped from this FailPoint.
- * Use the Scoped object to access failpoint data.
+ * The returned Scoped object will be active if the failpoint is active.
+ * If it's active, the returned object can be used to access FailPoint data.
*/
Scoped scoped() {
return scopedIf(nullptr);
}
/**
- * Create a Scoped from this FailPoint, only active when `pred(payload)` is true.
- * See `executeIf`. Use the Scoped object to access failpoint data.
+ * Create a Scoped from this FailPoint.
+ * If `pred(payload)` is true, then the returned Scoped object is active and the
+ * FailPoint's activation count is altered (relevant to e.g. the `nTimes` mode). If the
+ * predicate is false, an inactive Scoped is returned and this FailPoint's mode is not
+ * modified at all.
+ * If it's active, the returned object can be used to access FailPoint data.
+ * The `pred` should be callable like a `bool pred(const BSONObj&)`.
*/
template <typename Pred>
Scoped scopedIf(Pred&& pred) {
- return Scoped(this, shouldFailOpenBlock(std::forward<Pred>(pred)));
+ return Scoped(this, _shouldFailOpenBlock(std::forward<Pred>(pred)));
}
template <typename F>
@@ -228,9 +283,11 @@ public:
}
/**
- * The predicate `pred` should behave like a `bool pred(const BSONObj& payload)`.
- * If `pred(payload)`, then `f(payload)` is executed. Otherwise, `f` is not
- * executed and this FailPoint's mode is not altered (e.g. `nTimes` isn't consumed).
+ * If `pred(payload)` is true, then `f(payload)` is executed and the FailPoint's
+ * activation count is altered (relevant to e.g. the `nTimes` mode). Otherwise, `f`
+ * is not executed and this FailPoint's mode is not altered (e.g. `nTimes` isn't
+ * consumed).
+ * The `pred` should be callable like a `bool pred(const BSONObj&)`.
*/
template <typename F, typename Pred>
void executeIf(F&& f, Pred&& pred) {
@@ -240,12 +297,21 @@ public:
}
}
+ /**
+ * Take 100msec pauses for as long as the FailPoint is active.
+ * This uses `shouldFail()` and therefore affects FailPoint counters.
+ */
void pauseWhileSet() {
while (MONGO_unlikely(shouldFail())) {
sleepmillis(100);
}
}
+ /**
+ * Like `pauseWhileSet`, but interruptible via the `opCtx->sleepFor` mechanism. See
+ * `mongo::Interruptible::sleepFor` (Interruptible is a base class of
+ * OperationContext).
+ */
void pauseWhileSet(OperationContext* opCtx) {
while (MONGO_unlikely(shouldFail())) {
opCtx->sleepFor(Milliseconds(100));
@@ -253,8 +319,8 @@ public:
}
private:
- void enable();
- void disable();
+ void _enable();
+ void _disable();
/**
* Checks whether fail point is active and increments the reference counter without
@@ -269,37 +335,37 @@ private:
* fastOff if its disabled and doesn't need to be closed
*/
template <typename Pred>
- RetCode shouldFailOpenBlock(Pred&& pred) {
+ RetCode _shouldFailOpenBlock(Pred&& pred) {
if (MONGO_likely((_fpInfo.loadRelaxed() & kActiveBit) == 0)) {
return fastOff;
}
- return slowShouldFailOpenBlock(std::forward<Pred>(pred));
+ return _slowShouldFailOpenBlock(std::forward<Pred>(pred));
}
- RetCode shouldFailOpenBlock() {
- return shouldFailOpenBlock(nullptr);
+ RetCode _shouldFailOpenBlock() {
+ return _shouldFailOpenBlock(nullptr);
}
/**
* Decrements the reference counter.
- * @see #shouldFailOpenBlock
+ * @see #_shouldFailOpenBlock
*/
- void shouldFailCloseBlock();
+ void _shouldFailCloseBlock();
/**
- * slow path for #shouldFailOpenBlock
+ * slow path for #_shouldFailOpenBlock
*
* If a callable is passed, and returns false, this will return userIgnored and avoid altering
* the mode in any way. The argument is the fail point payload.
*/
- RetCode slowShouldFailOpenBlock(std::function<bool(const BSONObj&)> cb) noexcept;
+ RetCode _slowShouldFailOpenBlock(std::function<bool(const BSONObj&)> cb) noexcept;
/**
* @return the stored BSONObj in this fail point. Note that this cannot be safely
* read if this fail point is off.
*/
- const BSONObj& getData() const;
+ const BSONObj& _getData() const;
static const ValType kActiveBit = 1 << 31;
@@ -317,4 +383,82 @@ private:
mutable Mutex _modMutex = MONGO_MAKE_LATCH("FailPoint::_modMutex");
};
+class FailPointRegistry {
+public:
+ FailPointRegistry();
+
+ /**
+ * Adds a new fail point to this registry. Duplicate names are not allowed.
+ *
+ * @return the status code under these circumstances:
+ * OK - if successful.
+ * 51006 - if the given name already exists in this registry.
+ * CannotMutateObject - if this registry is already frozen.
+ */
+ Status add(const std::string& name, FailPoint* failPoint);
+
+ /**
+ * @return a registered FailPoint, or nullptr if it was not registered.
+ */
+ FailPoint* find(const std::string& name) const;
+
+ /**
+ * Freezes this registry from being modified.
+ */
+ void freeze();
+
+ /**
+ * Creates a new FailPointServerParameter for each failpoint in the registry. This allows the
+ * failpoint to be set on the command line via --setParameter, but is only allowed when
+ * running with '--setParameter enableTestCommands=1'.
+ */
+ void registerAllFailPointsAsServerParameters();
+
+private:
+ bool _frozen;
+ stdx::unordered_map<std::string, FailPoint*> _fpMap;
+};
+
+/**
+ * A scope guard that enables a named FailPoint on construction and disables it on destruction.
+ */
+class FailPointEnableBlock {
+public:
+ explicit FailPointEnableBlock(std::string failPointName);
+ FailPointEnableBlock(std::string failPointName, BSONObj cmdObj);
+ ~FailPointEnableBlock();
+
+private:
+ std::string _failPointName;
+ FailPoint* _failPoint;
+};
+
+/**
+ * Set a fail point in the global registry to a given value via BSON
+ * @throw DBException corresponding to ErrorCodes::FailPointSetFailed if no failpoint
+ * called failPointName exists.
+ */
+void setGlobalFailPoint(const std::string& failPointName, const BSONObj& cmdObj);
+
+/**
+ * Registration object for FailPoint. Its static-initializer registers FailPoint `fp`
+ * into the `globalFailPointRegistry()` under the specified `name`.
+ */
+class FailPointRegisterer {
+public:
+ FailPointRegisterer(const std::string& name, FailPoint* fp);
+};
+
+FailPointRegistry& globalFailPointRegistry();
+
+/**
+ * Convenience macro for defining a fail point and registering it.
+ * Must be used at namespace scope, not at local (inside a function) or class scope.
+ * Never use in header files, only .cpp files.
+ */
+#define MONGO_FAIL_POINT_DEFINE(fp) \
+ ::mongo::FailPoint fp; \
+ ::mongo::FailPointRegisterer fp##failPointRegisterer(#fp, &fp);
+
+
} // namespace mongo