diff options
author | Michael Klishin <mklishin@pivotal.io> | 2020-01-13 20:42:21 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-13 20:42:21 +0300 |
commit | 5576cbc781dc1e2eeb1d1d3773ad6d60daacdec7 (patch) | |
tree | 6d3b1b732b3254d7532e0d75766968a25ad04509 | |
parent | 5d32f7edc9afbc0f397e37b25e482071cee00c70 (diff) | |
parent | 8ff98742d0d8ed5b64233d1bb9cf63b5a0c0d0e4 (diff) | |
download | rabbitmq-server-git-5576cbc781dc1e2eeb1d1d3773ad6d60daacdec7.tar.gz |
Merge pull request #2209 from rabbitmq/tomyouyou-tomyouyou-autoheal-l
Autoheal: the "autoheal_safe_to_start" state transition is not guaranteed to arrive on time
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | src/rabbit_autoheal.erl | 6 | ||||
-rw-r--r-- | test/partitions_SUITE.erl | 5 |
3 files changed, 10 insertions, 2 deletions
@@ -56,6 +56,7 @@ define PROJECT_ENV {reverse_dns_lookups, false}, {cluster_partition_handling, ignore}, {cluster_keepalive_interval, 10000}, + {autoheal_state_transition_timeout, 60000}, {tcp_listen_options, [{backlog, 128}, {nodelay, true}, {linger, {true, 0}}, diff --git a/src/rabbit_autoheal.erl b/src/rabbit_autoheal.erl index 77165fc26c..316c8c89cb 100644 --- a/src/rabbit_autoheal.erl +++ b/src/rabbit_autoheal.erl @@ -372,6 +372,7 @@ wait_for_supervisors(Monitors) -> restart_loser(State, Winner) -> rabbit_log:warning( "Autoheal: we were selected to restart; winner is ~p~n", [Winner]), + NextStateTimeout = application:get_env(rabbit, autoheal_state_transition_timeout, 60000), rabbit_node_monitor:run_outside_applications( fun () -> MRef = erlang:monitor(process, {?SERVER, Winner}), @@ -381,6 +382,11 @@ restart_loser(State, Winner) -> not_healing; autoheal_safe_to_start -> State + after NextStateTimeout -> + rabbit_log:warning( + "Autoheal: timed out waiting for a safe-to-start message from the winner (~p); will retry", + [Winner]), + not_healing end, erlang:demonitor(MRef, [flush]), %% During the restart, the autoheal state is lost so we diff --git a/test/partitions_SUITE.erl b/test/partitions_SUITE.erl index 12a43b9fa6..1c7151d209 100644 --- a/test/partitions_SUITE.erl +++ b/test/partitions_SUITE.erl @@ -21,7 +21,7 @@ -compile(export_all). -%% We set ticktime to 1s and setuptime is 7s so to make sure it +%% We set ticktime to 1s and setup time is 7s so to make sure it %% passes... -define(DELAY, 8000). @@ -119,7 +119,7 @@ end_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_finished(Config1, Testcase). %% ------------------------------------------------------------------- -%% Testcases. +%% Test cases. %% ------------------------------------------------------------------- ignore(Config) -> @@ -400,6 +400,7 @@ partial_pause_minority(Config) -> ok. partial_pause_if_all_down(Config) -> + rabbit_ct_broker_helpers:rpc_all(Config, application, set_env, [rabbit, autoheal_state_transition_timeout, 3000]), [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), set_mode(Config, {pause_if_all_down, [B], ignore}), block([{A, B}]), |