summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVy Hong <vhong@amazon.com>2021-09-22 15:46:15 -0700
committermergify-bot <noreply@mergify.io>2021-09-30 02:14:17 +0000
commit227d7bc250e751ebc2e3c5177d1cf6c1cde19bff (patch)
treeb0acfe360d9dec30a42ed76801432fcb16dfbd9b
parent3c2246b434117f44fca728d2abdaa17db3d4473b (diff)
downloadrabbitmq-server-git-mergify/bp/v3.9.x/pr-3517.tar.gz
Reuse list of nodes in peer discovery plugins that use Erlang global locksmergify/bp/v3.9.x/pr-3517
AWS, Kubernetes and Classic peer discovery plugins use list_nodes and Erlang global:set_lock to create a mutex lock. To unlock, these plugins get the latest list with list_nodes and call global:del_lock. However, if list_nodes within unlock fails, RabbitMQ will throw an uncaught exception and the lock will not be released until the node holding the lock is restarted. This prevents new nodes from joining the cluster. This failure can be avoided by passing the list of nodes from lock to unlock. If a node goes away (and comes back) between the lock and unlock calls, del_lock could still successfully remove the lock. Similarly, if a new node starts up between the lock and unlock calls, del_lock wouldn't need to inform the new node. (cherry picked from commit 7090199330fc0603b480ac1f4395831d240c6a9b)
-rw-r--r--deps/rabbit/src/rabbit_peer_discovery_classic_config.erl11
-rw-r--r--deps/rabbitmq_peer_discovery_aws/src/rabbit_peer_discovery_aws.erl19
-rw-r--r--deps/rabbitmq_peer_discovery_aws/test/unit_SUITE.erl19
-rw-r--r--deps/rabbitmq_peer_discovery_k8s/src/rabbit_peer_discovery_k8s.erl19
-rw-r--r--deps/rabbitmq_peer_discovery_k8s/test/rabbitmq_peer_discovery_k8s_SUITE.erl16
5 files changed, 42 insertions, 42 deletions
diff --git a/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl b/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl
index cc62dd4dd5..ff46321b13 100644
--- a/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl
+++ b/deps/rabbit/src/rabbit_peer_discovery_classic_config.erl
@@ -26,7 +26,8 @@ list_nodes() ->
Nodes when is_list(Nodes) -> {ok, {Nodes, disc}}
end.
--spec lock(Node :: node()) -> {ok, {ResourceId :: string(), LockRequesterId :: node()}} | {error, Reason :: string()}.
+-spec lock(Node :: node()) -> {ok, {{ResourceId :: string(), LockRequesterId :: node()}, Nodes :: [node()]}} |
+ {error, Reason :: string()}.
lock(Node) ->
{ok, {Nodes, _NodeType}} = list_nodes(),
@@ -40,15 +41,15 @@ lock(Node) ->
Retries = rabbit_nodes:lock_retries(),
case global:set_lock(LockId, Nodes, Retries) of
true ->
- {ok, LockId};
+ {ok, {LockId, Nodes}};
false ->
{error, io_lib:format("Acquiring lock taking too long, bailing out after ~b retries", [Retries])}
end.
--spec unlock({ResourceId :: string(), LockRequesterId :: node()}) -> ok.
+-spec unlock({{ResourceId :: string(), LockRequesterId :: node()}, Nodes :: [node()]}) ->
+ ok | {error, Reason :: string()}.
-unlock(LockId) ->
- {ok, {Nodes, _NodeType}} = list_nodes(),
+unlock({LockId, Nodes}) ->
global:del_lock(LockId, Nodes),
ok.
diff --git a/deps/rabbitmq_peer_discovery_aws/src/rabbit_peer_discovery_aws.erl b/deps/rabbitmq_peer_discovery_aws/src/rabbit_peer_discovery_aws.erl
index 7c355d4094..3df6890b54 100644
--- a/deps/rabbitmq_peer_discovery_aws/src/rabbit_peer_discovery_aws.erl
+++ b/deps/rabbitmq_peer_discovery_aws/src/rabbit_peer_discovery_aws.erl
@@ -114,7 +114,8 @@ unregister() ->
post_registration() ->
ok.
--spec lock(Node :: node()) -> {ok, {ResourceId :: string(), LockRequesterId :: node()}} | {error, Reason :: string()}.
+-spec lock(Node :: node()) -> {ok, {{ResourceId :: string(), LockRequesterId :: node()}, Nodes :: [node()]}} |
+ {error, Reason :: string()}.
lock(Node) ->
%% call list_nodes/0 externally such that meck can mock the function
@@ -129,7 +130,7 @@ lock(Node) ->
Retries = rabbit_nodes:lock_retries(),
case global:set_lock(LockId, Nodes, Retries) of
true ->
- {ok, LockId};
+ {ok, {LockId, Nodes}};
false ->
{error, io_lib:format("Acquiring lock taking too long, bailing out after ~b retries", [Retries])}
end;
@@ -142,16 +143,12 @@ lock(Node) ->
Error
end.
--spec unlock({ResourceId :: string(), LockRequesterId :: node()}) -> ok | {error, Reason :: string()}.
+-spec unlock({{ResourceId :: string(), LockRequesterId :: node()}, Nodes :: [node()]}) ->
+ ok | {error, Reason :: string()}.
-unlock(LockId) ->
- case ?MODULE:list_nodes() of
- {ok, {Nodes, disc}} ->
- global:del_lock(LockId, Nodes),
- ok;
- {error, _} = Error ->
- Error
- end.
+unlock({LockId, Nodes}) ->
+ global:del_lock(LockId, Nodes),
+ ok.
%%
%% Implementation
diff --git a/deps/rabbitmq_peer_discovery_aws/test/unit_SUITE.erl b/deps/rabbitmq_peer_discovery_aws/test/unit_SUITE.erl
index 61e8ed2561..e9c3d14112 100644
--- a/deps/rabbitmq_peer_discovery_aws/test/unit_SUITE.erl
+++ b/deps/rabbitmq_peer_discovery_aws/test/unit_SUITE.erl
@@ -75,23 +75,26 @@ registration_support(_Config) ->
lock_single_node(_Config) ->
LocalNode = node(),
- meck:expect(rabbit_peer_discovery_aws, list_nodes, 0, {ok, {[LocalNode], disc}}),
+ Nodes = [LocalNode],
+ meck:expect(rabbit_peer_discovery_aws, list_nodes, 0, {ok, {Nodes, disc}}),
- {ok, LockId} = rabbit_peer_discovery_aws:lock(LocalNode),
- ?assertEqual(ok, rabbit_peer_discovery_aws:unlock(LockId)).
+ {ok, {LockId, Nodes}} = rabbit_peer_discovery_aws:lock(LocalNode),
+ ?assertEqual(ok, rabbit_peer_discovery_aws:unlock({LockId, Nodes})).
lock_multiple_nodes(_Config) ->
application:set_env(rabbit, cluster_formation, [{internal_lock_retries, 2}]),
LocalNode = node(),
OtherNode = other@host,
- meck:expect(rabbit_peer_discovery_aws, list_nodes, 0, {ok, {[OtherNode, LocalNode], disc}}),
+ Nodes = [OtherNode, LocalNode],
+ meck:expect(rabbit_peer_discovery_aws, list_nodes, 0, {ok, {Nodes, disc}}),
- {ok, {LockResourceId, OtherNode}} = rabbit_peer_discovery_aws:lock(OtherNode),
+ {ok, {{LockResourceId, OtherNode}, Nodes}} = rabbit_peer_discovery_aws:lock(OtherNode),
?assertEqual({error, "Acquiring lock taking too long, bailing out after 2 retries"},
rabbit_peer_discovery_aws:lock(LocalNode)),
- ?assertEqual(ok, rabbitmq_peer_discovery_aws:unlock({LockResourceId, OtherNode})),
- ?assertEqual({ok, {LockResourceId, LocalNode}}, rabbit_peer_discovery_aws:lock(LocalNode)),
- ?assertEqual(ok, rabbitmq_peer_discovery_aws:unlock({LockResourceId, LocalNode})).
+ ?assertEqual(ok, rabbitmq_peer_discovery_aws:unlock({{LockResourceId, OtherNode}, Nodes})),
+
+ ?assertEqual({ok, {{LockResourceId, LocalNode}, Nodes}}, rabbit_peer_discovery_aws:lock(LocalNode)),
+ ?assertEqual(ok, rabbitmq_peer_discovery_aws:unlock({{LockResourceId, LocalNode}, Nodes})).
lock_local_node_not_discovered(_Config) ->
meck:expect(rabbit_peer_discovery_aws, list_nodes, 0, {ok, {[n1@host, n2@host], disc}} ),
diff --git a/deps/rabbitmq_peer_discovery_k8s/src/rabbit_peer_discovery_k8s.erl b/deps/rabbitmq_peer_discovery_k8s/src/rabbit_peer_discovery_k8s.erl
index 7075d0e5ca..117be80bc1 100644
--- a/deps/rabbitmq_peer_discovery_k8s/src/rabbit_peer_discovery_k8s.erl
+++ b/deps/rabbitmq_peer_discovery_k8s/src/rabbit_peer_discovery_k8s.erl
@@ -68,7 +68,8 @@ register() ->
unregister() ->
ok.
--spec lock(Node :: node()) -> {ok, {ResourceId :: string(), LockRequesterId :: node()}} | {error, Reason :: string()}.
+-spec lock(Node :: node()) -> {ok, {{ResourceId :: string(), LockRequesterId :: node()}, Nodes :: [node()]}} |
+ {error, Reason :: string()}.
lock(Node) ->
%% call list_nodes/0 externally such that meck can mock the function
@@ -81,7 +82,7 @@ lock(Node) ->
Retries = rabbit_nodes:lock_retries(),
case global:set_lock(LockId, Nodes, Retries) of
true ->
- {ok, LockId};
+ {ok, {LockId, Nodes}};
false ->
{error, io_lib:format("Acquiring lock taking too long, bailing out after ~b retries", [Retries])}
end;
@@ -95,16 +96,12 @@ lock(Node) ->
Error
end.
--spec unlock({ResourceId :: string(), LockRequesterId :: node()}) -> ok | {error, Reason :: string()}.
+-spec unlock({{ResourceId :: string(), LockRequesterId :: node()}, Nodes :: [node()]}) ->
+ ok | {error, Reason :: string()}.
-unlock(LockId) ->
- case ?MODULE:list_nodes() of
- {ok, {Nodes, disc}} ->
- global:del_lock(LockId, Nodes),
- ok;
- {error, _} = Error ->
- Error
- end.
+unlock({LockId, Nodes}) ->
+ global:del_lock(LockId, Nodes),
+ ok.
%%
%% Implementation
diff --git a/deps/rabbitmq_peer_discovery_k8s/test/rabbitmq_peer_discovery_k8s_SUITE.erl b/deps/rabbitmq_peer_discovery_k8s/test/rabbitmq_peer_discovery_k8s_SUITE.erl
index 45b41f802b..88fe2843f2 100644
--- a/deps/rabbitmq_peer_discovery_k8s/test/rabbitmq_peer_discovery_k8s_SUITE.erl
+++ b/deps/rabbitmq_peer_discovery_k8s/test/rabbitmq_peer_discovery_k8s_SUITE.erl
@@ -145,22 +145,24 @@ event_v1_test(_Config) ->
lock_single_node(_Config) ->
LocalNode = node(),
+ Nodes = [LocalNode],
meck:expect(rabbit_peer_discovery_k8s, list_nodes, 0, {ok, {[LocalNode], disc}}),
- {ok, LockId} = rabbit_peer_discovery_k8s:lock(LocalNode),
- ?assertEqual(ok, rabbit_peer_discovery_k8s:unlock(LockId)).
+ {ok, {LockId, Nodes}} = rabbit_peer_discovery_k8s:lock(LocalNode),
+ ?assertEqual(ok, rabbit_peer_discovery_k8s:unlock({LockId, Nodes})).
lock_multiple_nodes(_Config) ->
application:set_env(rabbit, cluster_formation, [{internal_lock_retries, 2}]),
LocalNode = node(),
OtherNode = other@host,
- meck:expect(rabbit_peer_discovery_k8s, list_nodes, 0, {ok, {[OtherNode, LocalNode], disc}}),
+ Nodes = [OtherNode, LocalNode],
+ meck:expect(rabbit_peer_discovery_k8s, list_nodes, 0, {ok, {Nodes, disc}}),
- {ok, {LockResourceId, OtherNode}} = rabbit_peer_discovery_k8s:lock(OtherNode),
+ {ok, {{LockResourceId, OtherNode}, Nodes}} = rabbit_peer_discovery_k8s:lock(OtherNode),
?assertEqual({error, "Acquiring lock taking too long, bailing out after 2 retries"}, rabbit_peer_discovery_k8s:lock(LocalNode)),
- ?assertEqual(ok, rabbitmq_peer_discovery_k8s:unlock({LockResourceId, OtherNode})),
- ?assertEqual({ok, {LockResourceId, LocalNode}}, rabbit_peer_discovery_k8s:lock(LocalNode)),
- ?assertEqual(ok, rabbitmq_peer_discovery_k8s:unlock({LockResourceId, LocalNode})).
+ ?assertEqual(ok, rabbitmq_peer_discovery_k8s:unlock({{LockResourceId, OtherNode}, Nodes})),
+ ?assertEqual({ok, {{LockResourceId, LocalNode}, Nodes}}, rabbit_peer_discovery_k8s:lock(LocalNode)),
+ ?assertEqual(ok, rabbitmq_peer_discovery_k8s:unlock({{LockResourceId, LocalNode}, Nodes})).
lock_local_node_not_discovered(_Config) ->
meck:expect(rabbit_peer_discovery_k8s, list_nodes, 0, {ok, {[n1@host, n2@host], disc}} ),