diff options
author | Simon MacMullen <simon@rabbitmq.com> | 2014-10-03 17:50:18 +0100 |
---|---|---|
committer | Simon MacMullen <simon@rabbitmq.com> | 2014-10-03 17:50:18 +0100 |
commit | 9bd6ff0c68babf3d58bf7537b0519786327c754b (patch) | |
tree | e4900187d2384b0c3291e9085d4c8d1946e41334 | |
parent | acb8c43655b5149e4c5259433a4b9765c864a840 (diff) | |
download | rabbitmq-server-9bd6ff0c68babf3d58bf7537b0519786327c754b.tar.gz |
Just because we received a running_partitioned_network, doesn't mean all nodes are now contactable. Defer attempting autoheal until we can talk to everyone again, to avoid getting stuck in a loop with partial partition promotion.bug26213
-rw-r--r-- | src/rabbit_node_monitor.erl | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/src/rabbit_node_monitor.erl b/src/rabbit_node_monitor.erl index b4a429ed..f3aa2303 100644 --- a/src/rabbit_node_monitor.erl +++ b/src/rabbit_node_monitor.erl @@ -330,8 +330,8 @@ handle_cast({node_up, Node, NodeType}, end, add_node(Node, RunningNodes)}), ok = handle_live_rabbit(Node), - {noreply, State#state{ - monitors = pmon:monitor({rabbit, Node}, Monitors)}} + Monitors1 = pmon:monitor({rabbit, Node}, Monitors), + {noreply, maybe_autoheal(State#state{monitors = Monitors1})} end; handle_cast({joined_cluster, Node, NodeType}, State) -> @@ -393,8 +393,7 @@ handle_info({nodedown, Node, Info}, State = #state{node_guids = GUIDs}) -> handle_info({mnesia_system_event, {inconsistent_database, running_partitioned_network, Node}}, State = #state{partitions = Partitions, - monitors = Monitors, - autoheal = AState}) -> + monitors = Monitors}) -> %% We will not get a node_up from this node - yet we should treat it as %% up (mostly). State1 = case pmon:is_monitored({rabbit, Node}, Monitors) of @@ -405,8 +404,7 @@ handle_info({mnesia_system_event, ok = handle_live_rabbit(Node), Partitions1 = ordsets:to_list( ordsets:add_element(Node, ordsets:from_list(Partitions))), - {noreply, State1#state{partitions = Partitions1, - autoheal = rabbit_autoheal:maybe_start(AState)}}; + {noreply, maybe_autoheal(State1#state{partitions = Partitions1})}; handle_info({autoheal_msg, Msg}, State = #state{autoheal = AState, partitions = Partitions}) -> @@ -549,6 +547,15 @@ handle_live_rabbit(Node) -> ok = rabbit_alarm:on_node_up(Node), ok = rabbit_mnesia:on_node_up(Node). +maybe_autoheal(State = #state{partitions = []}) -> + State; + +maybe_autoheal(State = #state{autoheal = AState}) -> + case all_nodes_up() of + true -> State#state{autoheal = rabbit_autoheal:maybe_start(AState)}; + false -> State + end. + %%-------------------------------------------------------------------- %% Internal utils %%-------------------------------------------------------------------- |