diff options
author | Michael Klishin <klishinm@vmware.com> | 2022-08-08 11:12:49 +0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-08 11:12:49 +0400 |
commit | ff8dfda022dbf2b7e301b88041040f39fa92aa03 (patch) | |
tree | 9a192032bbe4434ea7f1429d1116911faf43b77f | |
parent | 23e7fc860b659f9c5eb3f5c99c1c85df632982ba (diff) | |
parent | 8070344a38b5d3efb2e6687c73e0a163c12bd5aa (diff) | |
download | rabbitmq-server-git-ff8dfda022dbf2b7e301b88041040f39fa92aa03.tar.gz |
Merge pull request #5442 from rabbitmq/prevent-overlapping-partitions
Set kernel param prevent_overlapping_partitions to true
-rw-r--r-- | deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl | 11 | ||||
-rwxr-xr-x | deps/rabbit/scripts/rabbitmq-server | 2 | ||||
-rw-r--r-- | deps/rabbit/scripts/rabbitmq-server.bat | 2 | ||||
-rw-r--r-- | deps/rabbit/scripts/rabbitmq-service.bat | 2 | ||||
-rw-r--r-- | deps/rabbit/src/rabbit_ff_controller.erl | 2 | ||||
-rw-r--r-- | deps/rabbit/src/rabbit_mnesia.erl | 3 | ||||
-rw-r--r-- | deps/rabbit/src/rabbit_node_monitor.erl | 70 |
7 files changed, 10 insertions, 82 deletions
diff --git a/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl b/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl index 9e866c4f16..2980301b7b 100644 --- a/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl +++ b/deps/rabbit/apps/rabbitmq_prelaunch/src/rabbit_prelaunch_conf.erl @@ -24,7 +24,7 @@ setup(Context) -> %% TODO: Check if directories/files are inside Mnesia dir. ok = set_default_config(), - ok = disable_kernel_overlapping_partitions(), + ok = enable_kernel_overlapping_partitions(), AdditionalConfigFiles = find_additional_config_files(Context), AdvancedConfigFile = find_actual_advanced_config_file(Context), @@ -575,8 +575,7 @@ get_input_iodevice() -> end end. -disable_kernel_overlapping_partitions() -> - %% This new "fixed" behavior seriously affects our own partition handling, - %% and potentially even libraries such as Aten and Ra, - %% so disable this to be forward-compatible with Erlang 25 - application:set_env(kernel, prevent_overlapping_partitions, false). +enable_kernel_overlapping_partitions() -> + %% Kernel parameter prevent_overlapping_partitions got introduced + %% in Erlang 24.3 and is set to `true` by default in Erlang 25. + application:set_env(kernel, prevent_overlapping_partitions, true). diff --git a/deps/rabbit/scripts/rabbitmq-server b/deps/rabbit/scripts/rabbitmq-server index c9612d8946..303da6f534 100755 --- a/deps/rabbit/scripts/rabbitmq-server +++ b/deps/rabbit/scripts/rabbitmq-server @@ -81,7 +81,7 @@ start_rabbitmq_server() { ${RABBITMQ_SERVER_START_ARGS} \ -syslog logger '[]' \ -syslog syslog_error_logger false \ - -kernel prevent_overlapping_partitions false \ + -kernel prevent_overlapping_partitions true \ "$@" } diff --git a/deps/rabbit/scripts/rabbitmq-server.bat b/deps/rabbit/scripts/rabbitmq-server.bat index 970a101479..19a250707f 100644 --- a/deps/rabbit/scripts/rabbitmq-server.bat +++ b/deps/rabbit/scripts/rabbitmq-server.bat @@ -70,7 +70,7 @@ if "!RABBITMQ_ALLOW_INPUT!"=="" ( !RABBITMQ_SERVER_START_ARGS! ^
-syslog logger [] ^
-syslog syslog_error_logger false ^
--kernel prevent_overlapping_partitions false ^
+-kernel prevent_overlapping_partitions true ^
!STAR!
if ERRORLEVEL 1 (
diff --git a/deps/rabbit/scripts/rabbitmq-service.bat b/deps/rabbit/scripts/rabbitmq-service.bat index 7870417b46..779bfc1ec2 100644 --- a/deps/rabbit/scripts/rabbitmq-service.bat +++ b/deps/rabbit/scripts/rabbitmq-service.bat @@ -200,7 +200,7 @@ set ERLANG_SERVICE_ARGUMENTS= ^ !RABBITMQ_DIST_ARG! ^
-syslog logger [] ^
-syslog syslog_error_logger false ^
--kernel prevent_overlapping_partitions false ^
+-kernel prevent_overlapping_partitions true ^
!STARVAR!
set ERLANG_SERVICE_ARGUMENTS=!ERLANG_SERVICE_ARGUMENTS:\=\\!
diff --git a/deps/rabbit/src/rabbit_ff_controller.erl b/deps/rabbit/src/rabbit_ff_controller.erl index 7b005c5db7..f8dd874dc6 100644 --- a/deps/rabbit/src/rabbit_ff_controller.erl +++ b/deps/rabbit/src/rabbit_ff_controller.erl @@ -268,7 +268,7 @@ register_globally() -> "Feature flags: [global sync] @ ~s", [node()], #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - ok = rabbit_node_monitor:global_sync(), + ok = global:sync(), ?LOG_DEBUG( "Feature flags: [global register] @ ~s", [node()], diff --git a/deps/rabbit/src/rabbit_mnesia.erl b/deps/rabbit/src/rabbit_mnesia.erl index 66362bdfc7..e802065b57 100644 --- a/deps/rabbit/src/rabbit_mnesia.erl +++ b/deps/rabbit/src/rabbit_mnesia.erl @@ -84,8 +84,7 @@ init() -> %% We intuitively expect the global name server to be synced when %% Mnesia is up. In fact that's not guaranteed to be the case - %% let's make it so. - ok = rabbit_node_monitor:global_sync(), - ok. + ok = global:sync(). init_with_lock() -> {Retries, Timeout} = rabbit_peer_discovery:locking_retry_timeout(), diff --git a/deps/rabbit/src/rabbit_node_monitor.erl b/deps/rabbit/src/rabbit_node_monitor.erl index 447dde637f..6955307cac 100644 --- a/deps/rabbit/src/rabbit_node_monitor.erl +++ b/deps/rabbit/src/rabbit_node_monitor.erl @@ -19,7 +19,6 @@ -export([notify_node_up/0, notify_joined_cluster/0, notify_left_cluster/1]). -export([partitions/0, partitions/1, status/1, subscribe/1]). -export([pause_partition_guard/0]). --export([global_sync/0]). %% gen_server callbacks -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, @@ -269,75 +268,6 @@ pause_if_all_down_guard(PreferredNodes, LastNodes, LastState) -> end. %%---------------------------------------------------------------------------- -%% "global" hang workaround. -%%---------------------------------------------------------------------------- - -%% This code works around a possible inconsistency in the "global" -%% state, causing global:sync/0 to never return. -%% -%% 1. A process is spawned. -%% 2. If after 10", global:sync() didn't return, the "global" -%% state is parsed. -%% 3. If it detects that a sync is blocked for more than 10", -%% the process sends fake nodedown/nodeup events to the two -%% nodes involved (one local, one remote). -%% 4. Both "global" instances restart their synchronisation. -%% 5. global:sync() finally returns. -%% -%% FIXME: Remove this workaround, once we got rid of the change to -%% "dist_auto_connect" and fixed the bugs uncovered. - -global_sync() -> - Pid = spawn(fun workaround_global_hang/0), - ok = global:sync(), - Pid ! global_sync_done, - ok. - -workaround_global_hang() -> - receive - global_sync_done -> - ok - after 10_000 -> - find_blocked_global_peers() - end. - -find_blocked_global_peers() -> - Snapshot1 = snapshot_global_dict(), - timer:sleep(10_000), - Snapshot2 = snapshot_global_dict(), - find_blocked_global_peers1(Snapshot2, Snapshot1). - -snapshot_global_dict() -> - {status, _, _, [Dict | _]} = sys:get_status(global_name_server), - [E || {{sync_tag_his, _}, _} = E <- Dict]. - -find_blocked_global_peers1([{{sync_tag_his, Peer}, _} = Item | Rest], - OlderSnapshot) -> - case lists:member(Item, OlderSnapshot) of - true -> unblock_global_peer(Peer); - false -> ok - end, - find_blocked_global_peers1(Rest, OlderSnapshot); -find_blocked_global_peers1([], _) -> - ok. - -unblock_global_peer(PeerNode) -> - ThisNode = node(), - PeerState = rpc:call(PeerNode, sys, get_status, [global_name_server]), - logger:debug( - "Global hang workaround: global state on ~s seems inconsistent~n" - " * Peer global state: ~p~n" - " * Local global state: ~p~n" - "Faking nodedown/nodeup between ~s and ~s", - [PeerNode, PeerState, sys:get_status(global_name_server), - PeerNode, ThisNode]), - {global_name_server, ThisNode} ! {nodedown, PeerNode}, - {global_name_server, PeerNode} ! {nodedown, ThisNode}, - {global_name_server, ThisNode} ! {nodeup, PeerNode}, - {global_name_server, PeerNode} ! {nodeup, ThisNode}, - ok. - -%%---------------------------------------------------------------------------- %% gen_server callbacks %%---------------------------------------------------------------------------- |