summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Klishin <klishinm@vmware.com>2022-08-08 11:12:49 +0400
committerGitHub <noreply@github.com>2022-08-08 11:12:49 +0400
commitff8dfda022dbf2b7e301b88041040f39fa92aa03 (patch)
tree9a192032bbe4434ea7f1429d1116911faf43b77f
parent23e7fc860b659f9c5eb3f5c99c1c85df632982ba (diff)
parent8070344a38b5d3efb2e6687c73e0a163c12bd5aa (diff)
downloadrabbitmq-server-git-ff8dfda022dbf2b7e301b88041040f39fa92aa03.tar.gz
Merge pull request #5442 from rabbitmq/prevent-overlapping-partitions
Set kernel param prevent_overlapping_partitions to true
-rw-r--r--deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl11
-rwxr-xr-xdeps/rabbit/scripts/rabbitmq-server2
-rw-r--r--deps/rabbit/scripts/rabbitmq-server.bat2
-rw-r--r--deps/rabbit/scripts/rabbitmq-service.bat2
-rw-r--r--deps/rabbit/src/rabbit_ff_controller.erl2
-rw-r--r--deps/rabbit/src/rabbit_mnesia.erl3
-rw-r--r--deps/rabbit/src/rabbit_node_monitor.erl70
7 files changed, 10 insertions, 82 deletions
diff --git a/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl b/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl
index 9e866c4f16..2980301b7b 100644
--- a/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl
+++ b/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl
@@ -24,7 +24,7 @@ setup(Context) ->
%% TODO: Check if directories/files are inside Mnesia dir.
ok = set_default_config(),
- ok = disable_kernel_overlapping_partitions(),
+ ok = enable_kernel_overlapping_partitions(),
AdditionalConfigFiles = find_additional_config_files(Context),
AdvancedConfigFile = find_actual_advanced_config_file(Context),
@@ -575,8 +575,7 @@ get_input_iodevice() ->
end
end.
-disable_kernel_overlapping_partitions() ->
- %% This new "fixed" behavior seriously affects our own partition handling,
- %% and potentially even libraries such as Aten and Ra,
- %% so disable this to be forward-compatible with Erlang 25
- application:set_env(kernel, prevent_overlapping_partitions, false).
+enable_kernel_overlapping_partitions() ->
+ %% Kernel parameter prevent_overlapping_partitions got introduced
+ %% in Erlang 24.3 and is set to `true` by default in Erlang 25.
+ application:set_env(kernel, prevent_overlapping_partitions, true).
diff --git a/deps/rabbit/scripts/rabbitmq-server b/deps/rabbit/scripts/rabbitmq-server
index c9612d8946..303da6f534 100755
--- a/deps/rabbit/scripts/rabbitmq-server
+++ b/deps/rabbit/scripts/rabbitmq-server
@@ -81,7 +81,7 @@ start_rabbitmq_server() {
${RABBITMQ_SERVER_START_ARGS} \
-syslog logger '[]' \
-syslog syslog_error_logger false \
- -kernel prevent_overlapping_partitions false \
+ -kernel prevent_overlapping_partitions true \
"$@"
}
diff --git a/deps/rabbit/scripts/rabbitmq-server.bat b/deps/rabbit/scripts/rabbitmq-server.bat
index 970a101479..19a250707f 100644
--- a/deps/rabbit/scripts/rabbitmq-server.bat
+++ b/deps/rabbit/scripts/rabbitmq-server.bat
@@ -70,7 +70,7 @@ if "!RABBITMQ_ALLOW_INPUT!"=="" (
!RABBITMQ_SERVER_START_ARGS! ^
-syslog logger [] ^
-syslog syslog_error_logger false ^
--kernel prevent_overlapping_partitions false ^
+-kernel prevent_overlapping_partitions true ^
!STAR!
if ERRORLEVEL 1 (
diff --git a/deps/rabbit/scripts/rabbitmq-service.bat b/deps/rabbit/scripts/rabbitmq-service.bat
index 7870417b46..779bfc1ec2 100644
--- a/deps/rabbit/scripts/rabbitmq-service.bat
+++ b/deps/rabbit/scripts/rabbitmq-service.bat
@@ -200,7 +200,7 @@ set ERLANG_SERVICE_ARGUMENTS= ^
!RABBITMQ_DIST_ARG! ^
-syslog logger [] ^
-syslog syslog_error_logger false ^
--kernel prevent_overlapping_partitions false ^
+-kernel prevent_overlapping_partitions true ^
!STARVAR!
set ERLANG_SERVICE_ARGUMENTS=!ERLANG_SERVICE_ARGUMENTS:\=\\!
diff --git a/deps/rabbit/src/rabbit_ff_controller.erl b/deps/rabbit/src/rabbit_ff_controller.erl
index 7b005c5db7..f8dd874dc6 100644
--- a/deps/rabbit/src/rabbit_ff_controller.erl
+++ b/deps/rabbit/src/rabbit_ff_controller.erl
@@ -268,7 +268,7 @@ register_globally() ->
"Feature flags: [global sync] @ ~s",
[node()],
#{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}),
- ok = rabbit_node_monitor:global_sync(),
+ ok = global:sync(),
?LOG_DEBUG(
"Feature flags: [global register] @ ~s",
[node()],
diff --git a/deps/rabbit/src/rabbit_mnesia.erl b/deps/rabbit/src/rabbit_mnesia.erl
index 66362bdfc7..e802065b57 100644
--- a/deps/rabbit/src/rabbit_mnesia.erl
+++ b/deps/rabbit/src/rabbit_mnesia.erl
@@ -84,8 +84,7 @@ init() ->
%% We intuitively expect the global name server to be synced when
%% Mnesia is up. In fact that's not guaranteed to be the case -
%% let's make it so.
- ok = rabbit_node_monitor:global_sync(),
- ok.
+ ok = global:sync().
init_with_lock() ->
{Retries, Timeout} = rabbit_peer_discovery:locking_retry_timeout(),
diff --git a/deps/rabbit/src/rabbit_node_monitor.erl b/deps/rabbit/src/rabbit_node_monitor.erl
index 447dde637f..6955307cac 100644
--- a/deps/rabbit/src/rabbit_node_monitor.erl
+++ b/deps/rabbit/src/rabbit_node_monitor.erl
@@ -19,7 +19,6 @@
-export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]).
-export([partitions/0, partitions/1, status/1, subscribe/1]).
-export([pause_partition_guard/0]).
--export([global_sync/0]).
%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
@@ -269,75 +268,6 @@ pause_if_all_down_guard(PreferredNodes, LastNodes, LastState) ->
end.
%%----------------------------------------------------------------------------
-%% "global" hang workaround.
-%%----------------------------------------------------------------------------
-
-%% This code works around a possible inconsistency in the "global"
-%% state, causing global:sync/0 to never return.
-%%
-%% 1. A process is spawned.
-%% 2. If after 10", global:sync() didn't return, the "global"
-%% state is parsed.
-%% 3. If it detects that a sync is blocked for more than 10",
-%% the process sends fake nodedown/nodeup events to the two
-%% nodes involved (one local, one remote).
-%% 4. Both "global" instances restart their synchronisation.
-%% 5. global:sync() finally returns.
-%%
-%% FIXME: Remove this workaround, once we got rid of the change to
-%% "dist_auto_connect" and fixed the bugs uncovered.
-
-global_sync() ->
- Pid = spawn(fun workaround_global_hang/0),
- ok = global:sync(),
- Pid ! global_sync_done,
- ok.
-
-workaround_global_hang() ->
- receive
- global_sync_done ->
- ok
- after 10_000 ->
- find_blocked_global_peers()
- end.
-
-find_blocked_global_peers() ->
- Snapshot1 = snapshot_global_dict(),
- timer:sleep(10_000),
- Snapshot2 = snapshot_global_dict(),
- find_blocked_global_peers1(Snapshot2, Snapshot1).
-
-snapshot_global_dict() ->
- {status, _, _, [Dict | _]} = sys:get_status(global_name_server),
- [E || {{sync_tag_his, _}, _} = E <- Dict].
-
-find_blocked_global_peers1([{{sync_tag_his, Peer}, _} = Item | Rest],
- OlderSnapshot) ->
- case lists:member(Item, OlderSnapshot) of
- true -> unblock_global_peer(Peer);
- false -> ok
- end,
- find_blocked_global_peers1(Rest, OlderSnapshot);
-find_blocked_global_peers1([], _) ->
- ok.
-
-unblock_global_peer(PeerNode) ->
- ThisNode = node(),
- PeerState = rpc:call(PeerNode, sys, get_status, [global_name_server]),
- logger:debug(
- "Global hang workaround: global state on ~s seems inconsistent~n"
- " * Peer global state: ~p~n"
- " * Local global state: ~p~n"
- "Faking nodedown/nodeup between ~s and ~s",
- [PeerNode, PeerState, sys:get_status(global_name_server),
- PeerNode, ThisNode]),
- {global_name_server, ThisNode} ! {nodedown, PeerNode},
- {global_name_server, PeerNode} ! {nodedown, ThisNode},
- {global_name_server, ThisNode} ! {nodeup, PeerNode},
- {global_name_server, PeerNode} ! {nodeup, ThisNode},
- ok.
-
-%%----------------------------------------------------------------------------
%% gen_server callbacks
%%----------------------------------------------------------------------------