diff options
author | Jean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr> | 2023-05-12 17:11:57 +0200 |
---|---|---|
committer | Jean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr> | 2023-05-16 10:46:32 +0200 |
commit | 213eb20e1658a1cebb4c1a07606e28fc7b62ebca (patch) | |
tree | f7ae3a58697bb94120e5333e21e748b30a8be4cb /deps/rabbit/src/rabbit_feature_flags.erl | |
parent | c7d0427d621493f00a3aa0d6e7720f709f3769a6 (diff) | |
download | rabbitmq-server-git-fix-feature-flags-init-vs-code_server-deadlock-take2.tar.gz |
rabbit_feature_flags: Fix possible deadlock when calling the Code server (take 2)fix-feature-flags-init-vs-code_server-deadlock-take2
[Why]
The background reason for this fix is about the same as the one
explained in the previous version of this fix; see commit
e0a2f1027278bd0901bdaa9af6065abc676ff14f.
This time, the order of events that led to a similar deadlock is the
following:
0. No `rabbit_ff_registry` is loaded yet.
1. Process A, B and C call `rabbit_ff_registry:something()` indirectly
which triggers two initializations in parallel.
* Process A did it from an explicit call to
`rabbit_ff_registry_factory:initialize_factory()` during RabbitMQ
boot.
* Process B and C indirectly called it because they checked if a
feature flag was enabled.
2. Process B acquires the lock first and finishes the initialization. A
new registry is loaded and the old `rabbit_ff_registry` module copy
is marked as "old". At this point, process A and C still reference
that old copy because `rabbit_ff_registry:something()` is up above in
its call stack.
3. Process A acquires the lock, prepares the new registry and tries to
soft-purge the old `rabbit_ff_registry` copy before loading the new
one.
This is where the deadlock happens: process A requests the Code server
to purge the old copy, but the Code server waits for process C to stop
using it.
The difference between the steps described in the first bug fix
attempt's commit and these ones is that the process which lingers on the
deleted `rabbit_ff_registry` (process C above) isn't the one who
acquired the lock; process A has it.
That's why the first bug fix isn't effective in this case: it relied on
the fact that the process which lingers on the deleted
`rabbit_ff_registry` is the process which attempts to purge the module.
[How]
In this commit, we go with a more drastic change. This time, we put a
wrapper in front of `rabbit_ff_registry` called
`rabbit_ff_registry_wrapper`. This wrapper is responsible for doing the
automatic initialization if the loaded registry is the stub module. The
`rabbit_ff_registry` stub now always returns `init_required` instead of
performing the initialization and calling itself recursively.
This way, processes linger on `rabbit_ff_registry_wrapper`, not on
`rabbit_ff_registry`. Thanks to this, the Code server can proceed with
the purge.
See #8112.
Diffstat (limited to 'deps/rabbit/src/rabbit_feature_flags.erl')
-rw-r--r-- | deps/rabbit/src/rabbit_feature_flags.erl | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/deps/rabbit/src/rabbit_feature_flags.erl b/deps/rabbit/src/rabbit_feature_flags.erl index 3f35fc47a9..06e46cc03a 100644 --- a/deps/rabbit/src/rabbit_feature_flags.erl +++ b/deps/rabbit/src/rabbit_feature_flags.erl @@ -113,7 +113,7 @@ remote_nodes/0, running_remote_nodes/0, does_node_support/3, - inject_test_feature_flags/1, + inject_test_feature_flags/1, inject_test_feature_flags/2, query_supported_feature_flags/0, does_enabled_feature_flags_list_file_exist/0, read_enabled_feature_flags_list/0, @@ -336,8 +336,8 @@ list() -> list(all). %% `disabled'. %% @returns A map of selected feature flags. -list(all) -> rabbit_ff_registry:list(all); -list(enabled) -> rabbit_ff_registry:list(enabled); +list(all) -> rabbit_ff_registry_wrapper:list(all); +list(enabled) -> rabbit_ff_registry_wrapper:list(enabled); list(disabled) -> maps:filter( fun(FeatureName, _) -> is_disabled(FeatureName) end, list(all)). @@ -381,7 +381,7 @@ enable(FeatureNames) when is_list(FeatureNames) -> with_feature_flags(FeatureNames, fun enable/1). uses_callbacks(FeatureName) when is_atom(FeatureName) -> - case rabbit_ff_registry:get(FeatureName) of + case rabbit_ff_registry_wrapper:get(FeatureName) of undefined -> false; FeatureProps -> uses_callbacks(FeatureProps) end; @@ -504,9 +504,11 @@ is_supported(FeatureNames, Timeout) -> %% `false' if one of them is not. is_supported_locally(FeatureName) when is_atom(FeatureName) -> - rabbit_ff_registry:is_supported(FeatureName); + rabbit_ff_registry_wrapper:is_supported(FeatureName); is_supported_locally(FeatureNames) when is_list(FeatureNames) -> - lists:all(fun(F) -> rabbit_ff_registry:is_supported(F) end, FeatureNames). + lists:all( + fun(F) -> rabbit_ff_registry_wrapper:is_supported(F) end, + FeatureNames). -spec is_enabled(feature_name() | [feature_name()]) -> boolean(). %% @doc @@ -563,19 +565,19 @@ is_enabled(FeatureNames, blocking) -> end. is_enabled_nb(FeatureName) when is_atom(FeatureName) -> - rabbit_ff_registry:is_enabled(FeatureName); + rabbit_ff_registry_wrapper:is_enabled(FeatureName); is_enabled_nb(FeatureNames) when is_list(FeatureNames) -> lists:foldl( fun (_F, state_changing = Acc) -> Acc; (F, false = Acc) -> - case rabbit_ff_registry:is_enabled(F) of + case rabbit_ff_registry_wrapper:is_enabled(F) of state_changing -> state_changing; _ -> Acc end; (F, _) -> - rabbit_ff_registry:is_enabled(F) + rabbit_ff_registry_wrapper:is_enabled(F) end, true, FeatureNames). @@ -710,7 +712,7 @@ get_state(FeatureName) when is_atom(FeatureName) -> %% given feature flag name doesn't correspond to a known feature flag. get_stability(FeatureName) when is_atom(FeatureName) -> - case rabbit_ff_registry:get(FeatureName) of + case rabbit_ff_registry_wrapper:get(FeatureName) of undefined -> undefined; FeatureProps -> get_stability(FeatureProps) end; @@ -738,6 +740,9 @@ init() -> -define(PT_TESTSUITE_ATTRS, {?MODULE, testsuite_feature_flags_attrs}). inject_test_feature_flags(FeatureFlags) -> + inject_test_feature_flags(FeatureFlags, true). + +inject_test_feature_flags(FeatureFlags, InitReg) -> ExistingAppAttrs = module_attributes_from_testsuite(), FeatureFlagsPerApp0 = lists:foldl( fun({Origin, Origin, FFlags}, Acc) -> @@ -768,7 +773,10 @@ inject_test_feature_flags(FeatureFlags) -> [FeatureFlags, AttributesFromTestsuite], #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), ok = persistent_term:put(?PT_TESTSUITE_ATTRS, AttributesFromTestsuite), - rabbit_ff_registry_factory:initialize_registry(). + case InitReg of + true -> rabbit_ff_registry_factory:initialize_registry(); + false -> ok + end. module_attributes_from_testsuite() -> persistent_term:get(?PT_TESTSUITE_ATTRS, []). |