summaryrefslogtreecommitdiff
path: root/deps/rabbit/src/rabbit_feature_flags.erl
diff options
context:
space:
mode:
authorJean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr>2023-05-12 17:11:57 +0200
committerJean-Sébastien Pédron <jean-sebastien.pedron@dumbbell.fr>2023-05-16 10:46:32 +0200
commit213eb20e1658a1cebb4c1a07606e28fc7b62ebca (patch)
treef7ae3a58697bb94120e5333e21e748b30a8be4cb /deps/rabbit/src/rabbit_feature_flags.erl
parentc7d0427d621493f00a3aa0d6e7720f709f3769a6 (diff)
downloadrabbitmq-server-git-fix-feature-flags-init-vs-code_server-deadlock-take2.tar.gz
rabbit_feature_flags: Fix possible deadlock when calling the Code server (take 2)fix-feature-flags-init-vs-code_server-deadlock-take2
[Why] The background reason for this fix is about the same as the one explained in the previous version of this fix; see commit e0a2f1027278bd0901bdaa9af6065abc676ff14f. This time, the order of events that led to a similar deadlock is the following: 0. No `rabbit_ff_registry` is loaded yet. 1. Process A, B and C call `rabbit_ff_registry:something()` indirectly which triggers two initializations in parallel. * Process A did it from an explicit call to `rabbit_ff_registry_factory:initialize_factory()` during RabbitMQ boot. * Process B and C indirectly called it because they checked if a feature flag was enabled. 2. Process B acquires the lock first and finishes the initialization. A new registry is loaded and the old `rabbit_ff_registry` module copy is marked as "old". At this point, process A and C still reference that old copy because `rabbit_ff_registry:something()` is up above in its call stack. 3. Process A acquires the lock, prepares the new registry and tries to soft-purge the old `rabbit_ff_registry` copy before loading the new one. This is where the deadlock happens: process A requests the Code server to purge the old copy, but the Code server waits for process C to stop using it. The difference between the steps described in the first bug fix attempt's commit and these ones is that the process which lingers on the deleted `rabbit_ff_registry` (process C above) isn't the one who acquired the lock; process A has it. That's why the first bug fix isn't effective in this case: it relied on the fact that the process which lingers on the deleted `rabbit_ff_registry` is the process which attempts to purge the module. [How] In this commit, we go with a more drastic change. This time, we put a wrapper in front of `rabbit_ff_registry` called `rabbit_ff_registry_wrapper`. This wrapper is responsible for doing the automatic initialization if the loaded registry is the stub module. The `rabbit_ff_registry` stub now always returns `init_required` instead of performing the initialization and calling itself recursively. This way, processes linger on `rabbit_ff_registry_wrapper`, not on `rabbit_ff_registry`. Thanks to this, the Code server can proceed with the purge. See #8112.
Diffstat (limited to 'deps/rabbit/src/rabbit_feature_flags.erl')
-rw-r--r--deps/rabbit/src/rabbit_feature_flags.erl30
1 files changed, 19 insertions, 11 deletions
diff --git a/deps/rabbit/src/rabbit_feature_flags.erl b/deps/rabbit/src/rabbit_feature_flags.erl
index 3f35fc47a9..06e46cc03a 100644
--- a/deps/rabbit/src/rabbit_feature_flags.erl
+++ b/deps/rabbit/src/rabbit_feature_flags.erl
@@ -113,7 +113,7 @@
remote_nodes/0,
running_remote_nodes/0,
does_node_support/3,
- inject_test_feature_flags/1,
+ inject_test_feature_flags/1, inject_test_feature_flags/2,
query_supported_feature_flags/0,
does_enabled_feature_flags_list_file_exist/0,
read_enabled_feature_flags_list/0,
@@ -336,8 +336,8 @@ list() -> list(all).
%% `disabled'.
%% @returns A map of selected feature flags.
-list(all) -> rabbit_ff_registry:list(all);
-list(enabled) -> rabbit_ff_registry:list(enabled);
+list(all) -> rabbit_ff_registry_wrapper:list(all);
+list(enabled) -> rabbit_ff_registry_wrapper:list(enabled);
list(disabled) -> maps:filter(
fun(FeatureName, _) -> is_disabled(FeatureName) end,
list(all)).
@@ -381,7 +381,7 @@ enable(FeatureNames) when is_list(FeatureNames) ->
with_feature_flags(FeatureNames, fun enable/1).
uses_callbacks(FeatureName) when is_atom(FeatureName) ->
- case rabbit_ff_registry:get(FeatureName) of
+ case rabbit_ff_registry_wrapper:get(FeatureName) of
undefined -> false;
FeatureProps -> uses_callbacks(FeatureProps)
end;
@@ -504,9 +504,11 @@ is_supported(FeatureNames, Timeout) ->
%% `false' if one of them is not.
is_supported_locally(FeatureName) when is_atom(FeatureName) ->
- rabbit_ff_registry:is_supported(FeatureName);
+ rabbit_ff_registry_wrapper:is_supported(FeatureName);
is_supported_locally(FeatureNames) when is_list(FeatureNames) ->
- lists:all(fun(F) -> rabbit_ff_registry:is_supported(F) end, FeatureNames).
+ lists:all(
+ fun(F) -> rabbit_ff_registry_wrapper:is_supported(F) end,
+ FeatureNames).
-spec is_enabled(feature_name() | [feature_name()]) -> boolean().
%% @doc
@@ -563,19 +565,19 @@ is_enabled(FeatureNames, blocking) ->
end.
is_enabled_nb(FeatureName) when is_atom(FeatureName) ->
- rabbit_ff_registry:is_enabled(FeatureName);
+ rabbit_ff_registry_wrapper:is_enabled(FeatureName);
is_enabled_nb(FeatureNames) when is_list(FeatureNames) ->
lists:foldl(
fun
(_F, state_changing = Acc) ->
Acc;
(F, false = Acc) ->
- case rabbit_ff_registry:is_enabled(F) of
+ case rabbit_ff_registry_wrapper:is_enabled(F) of
state_changing -> state_changing;
_ -> Acc
end;
(F, _) ->
- rabbit_ff_registry:is_enabled(F)
+ rabbit_ff_registry_wrapper:is_enabled(F)
end,
true, FeatureNames).
@@ -710,7 +712,7 @@ get_state(FeatureName) when is_atom(FeatureName) ->
%% given feature flag name doesn't correspond to a known feature flag.
get_stability(FeatureName) when is_atom(FeatureName) ->
- case rabbit_ff_registry:get(FeatureName) of
+ case rabbit_ff_registry_wrapper:get(FeatureName) of
undefined -> undefined;
FeatureProps -> get_stability(FeatureProps)
end;
@@ -738,6 +740,9 @@ init() ->
-define(PT_TESTSUITE_ATTRS, {?MODULE, testsuite_feature_flags_attrs}).
inject_test_feature_flags(FeatureFlags) ->
+ inject_test_feature_flags(FeatureFlags, true).
+
+inject_test_feature_flags(FeatureFlags, InitReg) ->
ExistingAppAttrs = module_attributes_from_testsuite(),
FeatureFlagsPerApp0 = lists:foldl(
fun({Origin, Origin, FFlags}, Acc) ->
@@ -768,7 +773,10 @@ inject_test_feature_flags(FeatureFlags) ->
[FeatureFlags, AttributesFromTestsuite],
#{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}),
ok = persistent_term:put(?PT_TESTSUITE_ATTRS, AttributesFromTestsuite),
- rabbit_ff_registry_factory:initialize_registry().
+ case InitReg of
+ true -> rabbit_ff_registry_factory:initialize_registry();
+ false -> ok
+ end.
module_attributes_from_testsuite() ->
persistent_term:get(?PT_TESTSUITE_ATTRS, []).