log stale master pids properly, remove duplicate stale pid handling from rmq_misc

author: Tim Watson <tim@rabbitmq.com> 2012-07-23 11:26:32 +0100
committer: Tim Watson <tim@rabbitmq.com> 2012-07-23 11:26:32 +0100
commit: 0f6ba029bfef0650dce5625465d1914d47497c3b (patch)
tree: 4e81abc104cc6efc1067b9cca7e95bc186a06c00
parent: eca7e0725db080a249bca1720d06f5d2fb49e7cd (diff)
download: rabbitmq-server-0f6ba029bfef0650dce5625465d1914d47497c3b.tar.gz
2 files changed, 70 insertions, 108 deletions
diff --git a/src/rabbit_mirror_queue_misc.erl b/src/rabbit_mirror_queue_misc.erl
index f434f524..10e1b92b 100644
--- a/src/rabbit_mirror_queue_misc.erl
+++ b/src/rabbit_mirror_queue_misc.erl
@@ -138,39 +138,36 @@ add_mirror(Queue, MirrorNode) ->
                 [SPid] ->
                     case rabbit_misc:is_process_alive(SPid) of
                         true ->
-                            %% TODO: this condition is silently ignored - should
-                            %% we really be arriving at this state at all?
                             {error,{queue_already_mirrored_on_node,MirrorNode}};
                         false ->
-                            %% BUG-24942: we need to strip out this dead pid
-                            %% now, so we do so directly - perhaps we ought
-                            %% to start the txn sooner in order to get a more
-                            %% coarse grained lock though....
-                            %%
-                            %% BUG-24942: QUESTION - do we need to report that
-                            %% something has changed (either via gm or via
-                            %% the rabbit_event mechanisms) here?
-                            Q1 = Q#amqqueue{ slave_pids = (SPids -- [SPid]) },
-                            rabbit_misc:execute_mnesia_transaction(
-                                fun() ->
-                                    ok = rabbit_amqqueue:store_queue(Q1)
-                                end),
-                            start_child(Name, MirrorNode, Q1)
+                            %% See BUG-24942: we have a stale pid from an old
+                            %% incarnation of this node, because we've come
+                            %% back online faster than the node_down handling
+                            %% logic was able to deal with a death signal. We
+                            %% shall replace the stale pid, and the slave start
+                            %% logic handles this explicitly
+                            start_child(Name, MirrorNode, Q)
                     end
             end
         end).
 
 start_child(Name, MirrorNode, Q) ->
     case rabbit_mirror_queue_slave_sup:start_child(MirrorNode, [Q]) of
-        {ok, undefined} -> %% Already running
-                           ok;
-        {ok, SPid}      -> rabbit_log:info(
-                                "Adding mirror of ~s on node ~p: ~p~n",
-                                [rabbit_misc:rs(Name), MirrorNode, SPid]),
-                           ok;
-        Other           -> %% BUG-24942: should this not be checked for
-                           %% error conditions or something?
-                           Other
+        {ok, undefined} ->
+            %% NB: this means the mirror process was
+            %% already running on the given node.
+            ok;
+        {ok, SPid} ->
+            rabbit_log:info("Adding mirror of ~s on node ~p: ~p~n",
+                            [rabbit_misc:rs(Name), MirrorNode, SPid]),
+            ok;
+        {error, {stale_master_pid, StalePid}} ->
+            rabbit_log:warning("Detected stale HA master while adding "
+                               "mirror of ~s on node ~p: ~p~n",
+                               [rabbit_misc:rs(Name), MirrorNode, StalePid]),
+            ok;
+        Other ->
+            Other
     end.
 
 if_mirrored_queue(Queue, Fun) ->
diff --git a/src/rabbit_mirror_queue_slave.erl b/src/rabbit_mirror_queue_slave.erl
index 87fbfdfb..336ffa75 100644
--- a/src/rabbit_mirror_queue_slave.erl
+++ b/src/rabbit_mirror_queue_slave.erl
@@ -102,87 +102,52 @@ init(#amqqueue { name = QueueName } = Q) ->
     Self = self(),
     Node = node(),
     case rabbit_misc:execute_mnesia_transaction(
-           fun () ->
-                   [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] =
-                       mnesia:read({rabbit_queue, QueueName}),
-                   case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of
-                       [] ->
-                           MPids1 = MPids ++ [Self],
-                           ok = rabbit_amqqueue:store_queue(
-                                        Q1 #amqqueue { slave_pids = MPids1 }),
-                           {new, QPid};
-                       [MPid] when MPid =:= QPid ->
-                           case rabbit_misc:is_process_alive(MPid) of
-                               true ->
-                                   %% Well damn, this shouldn't really happen -
-                                   %% what this appears to mean is that this
-                                   %% node is attempting to start a slave, but
-                                   %% a pid already exists for this node and
-                                   %% it is *already* the master! This state
-                                   %% probably requires a bit more thought.
-                                   existing;
-                               false ->
-                                   %% We were the master, died, came back
-                                   %% online (but not after 3 days!) and
-                                   %% our death hasn't been registered by the
-                                   %% rest of our unbelieving flock yet!
-                                   %%
-                                   %% Actually, this is worse than it seems,
-                                   %% because the master is now down but the
-                                   %% pid in mnesia is from an old incarnation
-                                   %% of this node, so messages to it will be
-                                   %% silently dropped by Erlang (with some
-                                   %% helpful noise in the logs).
-                                   %%
-                                   %% I'm not sure how we're supposed to
-                                   %% recover from this. Won't messages get
-                                   %% lost, or at least lose their ordering
-                                   %% in this situation? Because a slave that
-                                   %% comes back online doesn't contain any
-                                   %% state (a slave will start off empty as
-                                   %% if they have no mirrored content at all)
-                                   %% then don't we fine ourselves in a slightly
-                                   %% inconsistent position here?
-                                   %%
-                                   %% In this scenario, I wonder whether we
-                                   %% should call init_with_backing_queue_state
-                                   %% to try and recover?
-                                   {stale, MPid}
-                           end;
-                       [SPid] ->
-                           case rabbit_misc:is_process_alive(SPid) of
-                               true ->
-                                    existing;
-                               false ->
-                                    %% we need to replace this pid as it
-                                    %% is stale, from an old incarnation 
-                                    %% of this node.
-                                    %%
-                                    %% NB: I *do* think we should completely
-                                    %% initialise this process before exiting
-                                    %% the mnesia txn in this case - once
-                                    %% we're *comitted* then I'd expect this
-                                    %% slave to become subject to any and all
-                                    %% invariants that members of the
-                                    %% slave_pids list should enforce, and
-                                    %% I'm *not* convinced this is possible
-                                    %% immediately after the txn commits
-                                    %% as the remaining initialisation code
-                                    %% could take arbitrary time to complete.
-
-                                    MPids1 = (MPids -- [SPid]) ++ [Self],
-                                    ok = rabbit_amqqueue:store_queue(
+        fun () ->
+            [Q1 = #amqqueue { pid = QPid, slave_pids = MPids }] =
+                mnesia:read({rabbit_queue, QueueName}),
+            case [Pid || Pid <- [QPid | MPids], node(Pid) =:= Node] of
+                [] ->
+                    MPids1 = MPids ++ [Self],
+                    ok = rabbit_amqqueue:store_queue(
+                                Q1 #amqqueue { slave_pids = MPids1 }),
+                    {new, QPid};
+                [MPid] when MPid =:= QPid ->
+                    case rabbit_misc:is_process_alive(MPid) of
+                        true ->
+                            %% What this appears to mean is that this
+                            %% node is attempting to start a slave, but
+                            %% a pid already exists for this node and
+                            %% it is *already* the master!
+                            %% This should never happen, so we fail noisily
+                            throw({invariant_failed,
+                                        {duplicate_live_master, Node}});
+                        false ->
+                            %% See bug24942: we have detected a stale
+                            %% master pid (from a previous incarnation
+                            %% of this node) which hasn't been detected
+                            %% via nodedown recovery. We cannot recover
+                            %% it here, so we bail and log the error.
+                            %% This does mean that this node is not a
+                            %% well behaving member of the HA configuration
+                            %% for this cluster and we have opened bug25074
+                            %% to address this situation explicitly.
+                            {stale, MPid}
+                    end;
+                [SPid] ->
+                    case rabbit_misc:is_process_alive(SPid) of
+                        true  -> existing;
+                        false -> %% See bug24942: we have detected a stale
+                                 %% slave pid (from a previous incarnation
+                                 %% of this node) which hasn't been detected
+                                 %% via nodedown recovery.
+                                 MPids1 = (MPids -- [SPid]) ++ [Self],
+                                 ok = rabbit_amqqueue:store_queue(
                                             Q1#amqqueue{slave_pids=MPids1}),
-                                    {new, QPid}
-                           end
-                   end
-           end) of
+                                 {new, QPid}
+                    end
+            end
+        end) of
         {new, MPid} ->
-            %% BUG-24942: *should* we move the whole initialisation process (bar
-            %% obviously the trap_exit and erlang:monitor/2 calls) into the
-            %% mnesia transaction? We could optionally return {ready, State}
-            %% from it, and in that case immediately return....
-
             process_flag(trap_exit, true), %% amqqueue_process traps exits too.
             {ok, GM} = gm:start_link(QueueName, ?MODULE, [self()]),
             receive {joined, GM} ->
@@ -218,10 +183,10 @@ init(#amqqueue { name = QueueName } = Q) ->
             {ok, State, hibernate,
              {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN,
               ?DESIRED_HIBERNATE}};
-        {stale, OldPid} ->
-            %% NB: placeholder for doing something actually useful here...
-            %% such as {error, {stale_master_pid, OldPid}}
-            ignore;
+        {stale, StalePid} ->
+            %% we cannot proceed if the master is stale, therefore we
+            %% fail to start and allow the error to be logged
+            {stop, {stale_master_pid, StalePid}};
         existing ->
             ignore
     end.
author	Tim Watson <tim@rabbitmq.com>	2012-07-23 11:26:32 +0100
committer	Tim Watson <tim@rabbitmq.com>	2012-07-23 11:26:32 +0100
commit	0f6ba029bfef0650dce5625465d1914d47497c3b (patch)
tree	4e81abc104cc6efc1067b9cca7e95bc186a06c00
parent	eca7e0725db080a249bca1720d06f5d2fb49e7cd (diff)
download	rabbitmq-server-0f6ba029bfef0650dce5625465d1914d47497c3b.tar.gz